def readAndInsertMainlineCounts(self,
                                    file,
                                    primary_street,
                                    cross_street1,
                                    cross_street2,
                                    user,
                                    logger,
                                    tzinfo=None):
        """
        Parses the given excel file representing mainline counts and inserts those counts into the countdracula database.
 
        * *file* is the Excel workbook file name
        * *primary_street* is the street on which the counts were taken
        * *cross_street1* and *cross_street2* are the bounding cross streets
        * *user* is the django User to associate with the count
        * *logger* is a logging instance
        
        On success, returns number of successful counts inserted.
        
        On failure, removes all counts from this workbook so it can be fixed and inserted again, and returns -1.
        """
        # logger.info("primary_street=[%s], cross_street1=[%s] cross_street2=[%s]" % (primary_street, cross_street1, cross_street2))
        mainline_counts = MainlineCount.objects.filter(sourcefile=file)
        if len(mainline_counts) > 0:
            logger.error("  readAndInsertMainlineCounts() called on %s, but %d mainline counts already "\
                        "exist with that sourcefile.  Skipping." % (file, len(mainline_counts)))
            return -1

        try:

            primary_street_list = StreetName.getPossibleStreetNames(
                primary_street)
            if len(primary_street_list) == 0:
                raise CountsWorkbookParserException(
                    "readMainlineCounts: primary street %s not found." %
                    primary_street)

            cross_street1_list = StreetName.getPossibleStreetNames(
                cross_street1)
            if len(cross_street1_list) == 0:
                raise CountsWorkbookParserException(
                    "readMainlineCounts: cross street 1 %s not found." %
                    cross_street1)

            cross_street2_list = StreetName.getPossibleStreetNames(
                cross_street2)
            if len(cross_street2_list) == 0:
                raise CountsWorkbookParserException(
                    "readMainlineCounts: cross street 2 %s not found." %
                    cross_street2)

            # looking for a primary street that intersects with both one of cross_street1 cross_Street2
            # collect this info in two dictionaries:
            #  { primary_street_name (StreetName instance) -> { cross_street1_name (StreetName instance) -> QuerySet of Node instances }}
            #  { primary_street_name (StreetName instance) -> { cross_street2_name (StreetName instance) -> QuerySet of Node instances }}
            intersections1 = {}
            intersections2 = {}

            for primary_street_name in primary_street_list:
                intersections1[primary_street_name] = {}
                intersections2[primary_street_name] = {}

                for cross_street1_name in cross_street1_list:

                    intersections1[primary_street_name][cross_street1_name] = Node.objects.filter(street_to_node__street_name=primary_street_name) \
                                                                                          .filter(street_to_node__street_name=cross_street1_name)
                    # don't bother if it's an empty set
                    if len(intersections1[primary_street_name]
                           [cross_street1_name]) == 0:
                        del intersections1[primary_street_name][
                            cross_street1_name]

                for cross_street2_name in cross_street2_list:
                    intersections2[primary_street_name][cross_street2_name] = Node.objects.filter(street_to_node__street_name=primary_street_name) \
                                                                                          .filter(street_to_node__street_name=cross_street2_name)
                    # don't bother if it's an empty set
                    if len(intersections2[primary_street_name]
                           [cross_street2_name]) == 0:
                        del intersections2[primary_street_name][
                            cross_street2_name]

            # ideally, there will be exactly one primary street with a cross street 1 candidate and a cross street 2 candidate
            primary_street_name_final = None
            for primary_street_name in primary_street_list:
                if len(intersections1[primary_street_name]) == 0: continue
                if len(intersections2[primary_street_name]) == 0: continue

                if len(intersections1[primary_street_name]) > 1:
                    raise CountsWorkbookParserException(
                        "readMainlineCounts: Street %s and cross street 1 %s have multiple intersections: %s"
                        % (primary_street_name, cross_street1,
                           str(intersections1[primary_street_name])))
                if len(intersections2[primary_street_name]) > 1:
                    raise CountsWorkbookParserException(
                        "readMainlineCounts: Street %s and cross street 2 %s have multiple intersections: %s"
                        % (primary_street_name, cross_street2,
                           str(intersections2[primary_street_name])))
                # already found one?
                if primary_street_name_final:
                    raise CountsWorkbookParserException(
                        "readMainlineCounts: Multiple primary streets (%s,%s) intersect with %s/%s"
                        % (primary_street_name, primary_street_name_final,
                           cross_street1, cross_street2))
                primary_street_name_final = primary_street_name

            if not primary_street_name_final:
                raise CountsWorkbookParserException(
                    "readMainlineCounts: Street %s and cross streets %s,%s have no intersections: %s %s"
                    % (primary_street, str(cross_street1_list),
                       str(cross_street2_list), str(intersections1),
                       str(intersections2)))

            # finalize the cross street names and intersection ids
            cross_street1_name_final = intersections1[
                primary_street_name_final].keys()[0]
            cross_street2_name_final = intersections2[
                primary_street_name_final].keys()[0]

            # go through the sheets and read the data
            book = xlrd.open_workbook(file)
            sheetnames = book.sheet_names()
            counts_saved = 0

            for sheet_idx in range(len(sheetnames)):

                if sheetnames[sheet_idx].lower() == "source": continue
                activesheet = book.sheet_by_name(sheetnames[sheet_idx])

                # Create date from sheetname in date format
                tmp_date = sheetnames[sheet_idx].split('.')
                date_yyyy_mm_dd = date(int(tmp_date[0]), int(tmp_date[1]),
                                       int(tmp_date[2]))

                project = ""
                sections = self.findSectionStarts(activesheet, logger)
                # loop through the different sections in the workbook
                for section in sections:

                    # project section?
                    if activesheet.cell_value(section[0],
                                              0).upper() == "PROJECT":
                        project = activesheet.cell_value(section[0] + 1, 0)
                        continue

                    # figure out the vehicle type and row with the column labels
                    assert (activesheet.cell_value(section[0] + 1,
                                                   0).upper() == "MAINLINE")
                    label_row = section[0] + 1

                    # figure out the vehicle type code
                    vehicle = activesheet.cell_value(section[0], 0)
                    if type(vehicle) in [types.FloatType, types.IntType
                                         ] and vehicle in range(16):
                        vtype = vehicle
                    elif type(vehicle) in [
                            types.StringType, types.UnicodeType
                    ]:
                        vtype = self.vehicleTypeForString(vehicle)
                    else:
                        vtype = 0  #TODO: fix
                    logger.info("  Worksheet %20s Vehicle=%2d %s" %
                                (sheetnames[sheet_idx], vtype, vehicle))

                    for column in range(
                            1, self.numNonBlankColumns(activesheet,
                                                       label_row)):

                        # Read the label header
                        ml_ondir_temp = activesheet.cell_value(
                            label_row, column)
                        ml_ondir = ml_ondir_temp[:2]
                        direction = ml_ondir[0]

                        # The convention is that cross street 1 is always north or west of cross street 2
                        # so use this cue to determine the origin/destination of the movement
                        if (direction == 'S' or direction == 'E'):
                            ml_fromstreet = cross_street1_name_final
                            ml_fromint = intersections1[
                                primary_street_name_final][ml_fromstreet][0]
                            ml_tostreet = cross_street2_name_final
                            ml_toint = intersections2[
                                primary_street_name_final][ml_tostreet][0]
                        else:
                            ml_fromstreet = cross_street2_name_final
                            ml_fromint = intersections2[
                                primary_street_name_final][ml_fromstreet][0]
                            ml_tostreet = cross_street1_name_final
                            ml_toint = intersections1[
                                primary_street_name_final][ml_tostreet][0]

                        # look for the mainline count location in countdracula
                        try:
                            mainline_count_location = MainlineCountLocation.objects.get(
                                from_int=ml_fromint,
                                to_int=ml_toint,
                                on_street=primary_street_name_final,
                                on_dir=ml_ondir)
                        except ObjectDoesNotExist:
                            mainline_count_location = MainlineCountLocation(
                                on_street=primary_street_name_final,
                                on_dir=ml_ondir,
                                from_street=ml_fromstreet,
                                from_int=ml_fromint,
                                to_street=ml_tostreet,
                                to_int=ml_toint)
                            mainline_count_location.save()

                        # process the rows
                        for row in range(section[0] + 2, section[1] + 1):

                            count = activesheet.cell_value(row, column)
                            if count == "": continue

                            (starttime, period) = self.createtimestamp(
                                activesheet.cell_value(row, 0), tzinfo=tzinfo)

                            mainline_count = MainlineCount(
                                location=mainline_count_location,
                                count=count,
                                count_date=date_yyyy_mm_dd,
                                start_time=starttime,
                                period_minutes=period,
                                vehicle_type=vtype,
                                reference_position=
                                -1,  # reference position unknown, it's not in the workbook
                                sourcefile=file,
                                project=project,
                                upload_user=user)
                            mainline_count.clean()
                            mainline_count.save()
                            counts_saved += 1

            logger.info("  Processed %s into countdracula" % file)
            logger.info("  Successfully saved %4d mainline counts" %
                        counts_saved)
            return counts_saved

        except Exception as e:
            logger.error("  Failed to process %s" % file)
            logger.error("  " + str(e))
            logger.error("  " + traceback.format_exc())

            # remove the rest of the counts for this sourcefile so it can be retried
            mainline_counts = MainlineCount.objects.filter(sourcefile=file)
            if len(mainline_counts) > 0:
                logger.debug(
                    "  Removing %d counts from countdracula so sourcefile %s can be reprocessed"
                    % (len(mainline_counts), file))
                mainline_counts.delete()
            else:
                logger.debug(
                    "  No counts to remove for sourcefile %s; sourcefile can be reprocessed"
                    % file)

            return -1
        elif pems_dir == "N": pems_dir = "NB"
        elif pems_dir == "E": pems_dir = "EB"
        elif pems_dir == "W": pems_dir = "WB"

        # look for the mainline count location
        try:
            mainline_count_location = MainlineCountLocation.objects.get(
                from_int=pemsid_to_cdlocation[pems_key][2],
                to_int=pemsid_to_cdlocation[pems_key][4],
                on_street=pemsid_to_cdlocation[pems_key][0],
                on_dir=pems_dir)
        except ObjectDoesNotExist:
            mainline_count_location = MainlineCountLocation(
                on_street=pemsid_to_cdlocation[pems_key][0],
                on_dir=pems_dir,
                from_street=pemsid_to_cdlocation[pems_key][1],
                from_int=pemsid_to_cdlocation[pems_key][2],
                to_street=pemsid_to_cdlocation[pems_key][3],
                to_int=pemsid_to_cdlocation[pems_key][4])
            mainline_count_location.save()

        # required fields for insertMainlineCounts: count, starttime, period, vtype,
        #      onstreet, ondir, fromstreet, tostreet, refpos, sourcefile, project

        pems_date_fields = pems_date.split(r"/")
        pems_time_fields = pems_time.split(r":")
        count_date = datetime.date(year=int(pems_date_fields[2]),
                                   month=int(pems_date_fields[0]),
                                   day=int(pems_date_fields[1]))
        starttime = datetime.time(hour=int(pems_time_fields[0]),
                                  minute=int(pems_time_fields[1]),
def readCensusCounts(mapping, census_dirname, user):
    """
    Reads the census station count workbooks and inputs those counts into the Count Dracula database.
    """
    filenames = sorted(os.listdir(census_dirname))
    for filename in filenames:

        if filename[-4:] != ".xls":
            logger.debug("Skipping non-xls file %s" % filename)
            continue

        logger.info("Processing PeMS Census file %s" % filename)

        filename_parts = filename[:-4].split("_")
        pems_id = filename_parts[0]
        pems_dir = pems_id[-2:].upper()

        pems_id = pems_id.replace("nb", "N")
        pems_id = pems_id.replace("sb", "S")

        pems_key = ("Census", pems_id)
        if pems_key not in mapping:
            logger.debug(
                "Couldn't find %s in mapping; don't care about this VDS. Skipping."
                % str(pems_key))
            continue

        try:
            intersection = getIntersectionStreetnamesForPemsKey(
                mapping, pems_key)
            logger.debug("%20s -> %s" % (str(pems_key), str(intersection)))
        except Exception, e:
            logger.error(e)
            continue

        # look for the mainline count location in countdracula
        try:
            mainline_count_location = MainlineCountLocation.objects.get(
                from_int=intersection[2],
                to_int=intersection[4],
                on_street=intersection[0],
                on_dir=pems_dir)
        except ObjectDoesNotExist:
            mainline_count_location = MainlineCountLocation(
                on_street=intersection[0],
                on_dir=pems_dir,
                from_street=intersection[1],
                from_int=intersection[2],
                to_street=intersection[3],
                to_int=intersection[4])
            mainline_count_location.save()

        workbook_filename = os.path.join(census_dirname, filename)
        book = xlrd.open_workbook(workbook_filename)

        # open the workbook
        assert ("Report Data"
                in book.sheet_names())  # standard PeMS sheetnames
        datasheet = book.sheet_by_name("Report Data")
        counts_saved = 0

        # for each day
        for col in range(1, len(datasheet.row(0))):
            pems_date = xlrd.xldate_as_tuple(datasheet.cell_value(0, col),
                                             book.datemode)

            # for each time
            for row in range(1, len(datasheet.col(0))):
                pems_time = xlrd.xldate_as_tuple(datasheet.cell_value(row, 0),
                                                 book.datemode)

                count_date = datetime.date(year=int(pems_date[0]),
                                           month=int(pems_date[1]),
                                           day=int(pems_date[2]))
                starttime = datetime.time(hour=int(pems_time[3]),
                                          minute=int(pems_time[4]),
                                          second=0)
                # tzinfo=TIMEZONE)

                count = datasheet.cell_value(row, col)
                if count == "": continue  # skip blanks
                if count == 0.0:
                    continue  # skip zeros, they aren't real zero counts
                project_str = "PeMS Census %s - %s" % (pems_id,
                                                       mapping[pems_key][3])

                # read the counts
                mainline_count = MainlineCount(
                    location=mainline_count_location,
                    count=count,
                    count_date=count_date,
                    start_time=starttime,
                    period_minutes=60,
                    vehicle_type=0,  # ALL
                    reference_position=-1,
                    sourcefile=workbook_filename,
                    project=project_str,
                    upload_user=user)
                mainline_count.clean()
                mainline_count.save()
                counts_saved += 1

        del book
        logger.info("Saved %3d census counts from %s into countdracula" %
                    (counts_saved, workbook_filename))
Exemple #4
0
            logger.info(
                "Mainline count on_street=%s on_dir=%s from_street=%s to_street=%s"
                % (on_street, on_dir, from_street, to_street))

            try:
                mainline_count_location = MainlineCountLocation.objects.get(
                    on_street=on_street,
                    on_dir=on_dir,
                    from_int=A_node,
                    to_int=B_node)
            except ObjectDoesNotExist:
                mainline_count_location = MainlineCountLocation(
                    on_street=on_street,
                    on_dir=on_dir,
                    from_street=from_street,
                    from_int=A_node,
                    to_street=to_street,
                    to_int=B_node)
                mainline_count_location.save()

            for colname in [
                    'EA_OBS', 'AM_OBS', 'MD_OBS', 'PM_OBS', 'EV_OBS', 'TOT_OBS'
            ]:
                count = decimal.Decimal(
                    datasheet.cell_value(row, colname_to_colnum[colname]))

                if count == 0: continue  # zeros are not real

                mainline_count = MainlineCount(
                    location=mainline_count_location,