def _main(osm, auth, sections):

    group = Group(osm, auth, MAPPING.keys(), None)

    for section in sections:
        assert section in group.SECTIONIDS.keys(), \
            "section must be in {!r}.".format(group.SECTIONIDS.keys())

    contacts = []

    for section in sections:
        section_contacts = [member2contacts(member, section) for
                          member in group.section_all_members(section)]

        #  flatten list of lists.
        contacts += list(itertools.chain(*section_contacts))

    # Remove blank emails
    contacts = [contact for contact in contacts if contact[2].strip() != "" ]

    # remove duplicates
    by_email = {contact[2]: contact for contact in contacts}
    contacts = list(by_email.values())

    w = csv_writer(sys.stdout)
    w.writerows(contacts)
Beispiel #2
0
def member_badges(osm, auth, firstname, lastname, csv=False, no_headers=False, term=None):
    group = Group(osm, auth, MAPPING.keys(), term)

    members = group.find_by_name(firstname, lastname)
    # member = members[-1]
    rows = []
    for member in members:
        for section_type in ('beavers', 'cubs', 'scouts'):
            try:
                badges = member.get_badges(section_type=section_type)
                if badges is not None:
                    for badge in [_ for _ in badges if _['awarded'] == '1']:
                        rows.append([member['date_of_birth'], member['last_name'],
                                     member['age'], section_type, member._section['sectionname'],
                                     badge['badge'],
                                     datetime.date.fromtimestamp(int(badge['awarded_date'])).isoformat()])
            except:
                import traceback
                traceback.print_exc()
                pass

    headers = ["DOB", "Last Name", "Age", "Section Type", "Section Name", "Badge"]

    if csv:
        w = csv_writer(sys.stdout)
        if not no_headers:
            w.writerow(list(headers))
        w.writerows(rows)
    else:
        if not no_headers:
            print(tabulate.tabulate(rows, headers=headers))
        else:
            print(tabulate.tabulate(rows, tablefmt="plain"))
def main():
    for infilename in sys.argv[1:]:
        outfilename = sub("\.csv", "_pad.csv", infilename)
        prev_dt = -1
        week = timedelta(days=7)
        one = timedelta(days=1)
        with open(outfilename, "wb") as outfile:
            w = csv_writer(outfile)
            with open(infilename, "rb") as infile:
                r = csv_reader(infile)
                header = r.next()
                w.writerow(header)
                for row in r:
                    dt = dt_parser.parse(row[0])
                    if prev_dt != -1:
                        # we're past the first line... compare!
                        diff = dt - prev_dt
                        if diff > one:
                            for i in reversed(range(diff.days - 1)):
                                wahoo = timedelta(days=(i+1))
                                pad = dt - wahoo
                                #print >> sys.stderr, "padding:%s" % pad
                                w.writerow([_get_dt_str(pad), 0])
                    w.writerow([_get_dt_str(dt), row[1]])
                    prev_dt = dt
Beispiel #4
0
def CreateConstMuskingumXFile(x_value,
                              in_connectivity_file,
                              out_x_file):
    """
    Create muskingum X file from value that is constant all the way through for each river segment.
    
    Args:
        x_value(float): Value for the muskingum X parameter [0-0.5].
        in_connectivity_file(str): The path to the RAPID connectivity file.
        out_x_file(str): The path to the output x file.
    
    Example::
    
        from RAPIDpy.gis.muskingum import CreateConstMuskingumXFile
        #------------------------------------------------------------------------------
        #main process
        #------------------------------------------------------------------------------
        if __name__ == "__main__":
            CreateConstMuskingumXFile(x_value=0.3,
                                      in_connectivity_file='/path/to/rapid_connect.csv',
                                      out_x_file='/path/to/x.csv',
                                      )
    """
    num_rivers = 0
    with open_csv(in_connectivity_file, "r") as csvfile:
        reader = csv_reader(csvfile)
        for row in reader:
            num_rivers+=1

    with open_csv(out_x_file,'w') as kfile:
        x_writer = csv_writer(kfile)
        for idx in xrange(num_rivers):
            x_writer.writerow([x_value])    
Beispiel #5
0
def StreamIDNextDownIDToConnectivity(stream_id_array,
                                     next_down_id_array,
                                     out_csv_file):
    """
    Creates RAPID connect file from stream_id array and next down id array
    """
    list_all = []
    max_count_upstream = 0

    for hydroid in np.sort(stream_id_array):
        # find the HydroID of the upstreams
        list_upstreamID = stream_id_array[next_down_id_array==hydroid]
        # count the total number of the upstreams
        count_upstream = len(list_upstreamID)
        if count_upstream > max_count_upstream:
            max_count_upstream = count_upstream
        nextDownID = next_down_id_array[stream_id_array==hydroid][0]
#THIS IS REMOVED DUE TO THE FACT THAT THERE ARE STREAMS WITH ID OF ZERO
#        # replace the nextDownID with 0 if it equals to -1 (no next downstream)
#        if nextDownID == -1:
#            nextDownID = 0
        # append the list of Stream HydroID, NextDownID, Count of Upstream ID, and  HydroID of each Upstream into a larger list
        list_all.append(np.concatenate([np.array([hydroid,nextDownID,count_upstream]),list_upstreamID]).astype(int))

    with open_csv(out_csv_file,'w') as csvfile:
        connectwriter = csv_writer(csvfile)
        for row_list in list_all:
            out = np.concatenate([row_list, np.array([0 for i in xrange(max_count_upstream - row_list[2])])])
            connectwriter.writerow(out.astype(int))
Beispiel #6
0
def CreateMuskingumKFile(lambda_k,
                         in_kfac_file,
                         out_k_file):
    """
    Creates muskingum k file from kfac file.
    
    Args:
        lambda_k(float): The value for lambda given from RAPID after the calibration process. If no calibration has been performed, 0.35 is reasonable.
        in_kfac_file(str): The path to the input kfac file.
        out_k_file(str): The path to the output k file.
    
    Example::
    
        from RAPIDpy.gis.muskingum import CreateMuskingumKFile
        #------------------------------------------------------------------------------
        #main process
        #------------------------------------------------------------------------------
        if __name__ == "__main__":
            CreateMuskingumKFile(lambda_k=0.35,
                                 in_kfac_file='/path/to/kfac.csv',
                                 out_k_file='/path/to/k.csv',
                                 )
    """
    kfac_table = csv_to_list(in_kfac_file)
    
    with open_csv(out_k_file,'w') as kfile:
        k_writer = csv_writer(kfile)
        for row in kfac_table:
             k_writer.writerow([lambda_k*float(row[0])])
Beispiel #7
0
 def to_csv(self):
     buf = BytesIO()
     w = csv_writer(buf)
     w.writerow([_('Style Rule'), _('Number of matches')])
     for r in xrange(self.proxy.rowCount()):
         entry = self.proxy.mapToSource(self.proxy.index(r, 0)).data(Qt.UserRole)
         w.writerow([entry.rule.selector, entry.count])
     return buf.getvalue()
Beispiel #8
0
 def to_csv(self):
     buf = BytesIO()
     w = csv_writer(buf)
     w.writerow([_('Class'), _('Number of matches')])
     for r in xrange(self.proxy.rowCount()):
         entry = self.proxy.mapToSource(self.proxy.index(r, 0)).data(Qt.UserRole)
         w.writerow([entry.cls, entry.num_of_matches])
     return buf.getvalue()
Beispiel #9
0
 def export_csv(self):
     out = StringIO()
     csv = csv_writer(out)
     attributes = self.attributes()
     headers = ["When"] + attributes
     csv.writerow(headers)
     for entry in self.entries():
         csv.writerow([entry.when] + [entry.attrs.get(x, "") for x in attributes])
     return out.getvalue()
Beispiel #10
0
 def to_csv(self):
     buf = BytesIO()
     w = csv_writer(buf)
     w.writerow(self.proxy.sourceModel().COLUMN_HEADERS)
     cols = self.proxy.columnCount()
     for r in xrange(self.proxy.rowCount()):
         items = [self.proxy.index(r, c).data(Qt.DisplayRole) for c in xrange(cols)]
         w.writerow(items)
     return buf.getvalue()
Beispiel #11
0
def write_csv(csv_file, rows, encoding=r'UTF-8'):
    from csv import writer as csv_writer
    from os import rename
    work = csv_file + '-'
    with open(work, r'wt', encoding=encoding) as ostream:
        w = csv_writer(ostream)
        for row in rows:
            w.writerow(row)
    rename(work, csv_file)
Beispiel #12
0
def compress_csv(in_path, n, out_path):
    """Compresses n csv rows into one"""
    header, rows = csv_elems(in_path)
    new_header = duplicate_header(header, n)
    new_rows = group_rows(rows, n)
    writer = csv_writer(open(out_path, 'w'), delimiter=',', quotechar='\"')
    writer.writerow(new_header)
    for row in new_rows:
        writer.writerow(row)
	def saveCSV(self):
		print "[Birthday Reminder] exporting CSV file", CSVFILE
		try:
			csvFile = open(CSVFILE, "wb")
			writer = csv_writer(csvFile)
			writer.writerows(self.birthdaytimer.getBirthdayList())
			csvFile.close()
			self.session.open(MessageBox, _("Wrote CSV file %s.") % CSVFILE, MessageBox.TYPE_INFO)
		except:
			self.session.open(MessageBox, _("Can't write CSV file %s.") % CSVFILE, MessageBox.TYPE_ERROR)
Beispiel #14
0
def serialize_csv_trace_obsels(graph, resource, bindings=None):
    sio = StringIO()
    csvw = csv_writer(sio)
    for row in iter_csv_rows(resource.trace.uri, graph):
        csvw.writerow([ i.encode('utf-8') for i in row ])
        # immediately yield each line
        yield sio.getvalue()
        # then empty sio before writing next line
        sio.reset()
        sio.truncate()
Beispiel #15
0
def FlowlineToPoint(in_drainage_line,
                    river_id,
                    out_csv_file,
                    file_geodatabase=None):
    """
    Converts flowline feature to a list of centroid points with their comid in EPSG:4326.

    Args:
        in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile.
        river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO').
        out_csv_file(str): Path to the output csv file with the centroid points.
        file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.)
    
    Example::
    
        from RAPIDpy.gis.centroid import FlowlineToPoint
        #------------------------------------------------------------------------------
        #main process
        #------------------------------------------------------------------------------
        if __name__ == "__main__":
            FlowlineToPoint(in_drainage_line='/path/to/drainageline.shp',
                            river_id='LINKNO',
                            out_csv_file='/path/to/comid_lat_lon_z.csv',
                            )
    
    """

    if file_geodatabase:
        gdb_driver = ogr.GetDriverByName("OpenFileGDB")
        ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0)
        ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line)
    else:
        ogr_drainage_line_shapefile = ogr.Open(in_drainage_line)
        ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer()
    
    ogr_drainage_line_shapefile_lyr_proj = ogr_drainage_line_shapefile_lyr.GetSpatialRef()
    osr_geographic_proj = osr.SpatialReference()
    osr_geographic_proj.ImportFromEPSG(4326)
    proj_transform = None
    if ogr_drainage_line_shapefile_lyr_proj != osr_geographic_proj:
        proj_transform = osr.CoordinateTransformation(ogr_drainage_line_shapefile_lyr_proj, osr_geographic_proj)

    #print valid field names to table
    with open_csv(out_csv_file, 'w') as outfile:
        writer = csv_writer(outfile)
        writer.writerow(['rivid','lat','lon','z'])
        for feature in ogr_drainage_line_shapefile_lyr:
            feat_geom = feature.GetGeometryRef()
            if proj_transform:
                feat_geom.Transform(proj_transform)
            centroid = feat_geom.Centroid()
            centroid_pt = centroid.GetPoint(0)
            writer.writerow([feature.GetField(river_id), centroid_pt[1], centroid_pt[0], centroid_pt[2]])
def main():
    req = requests.get('http://summaries.cochrane.org/search/site/chocolate')

    soup = BeautifulSoup(req.text)

    categories = get_categories_for_search(soup)

    filename = 'categories.csv'
    with open(filename, 'w') as csv_file:
        for category in categories:
            spamwriter = csv_writer(csv_file)
            spamwriter.writerow([category])
Beispiel #17
0
def parse_results(in_path, out_path, threshold, hedge_path):
    """Parse results csv file from CrowdFlower"""
    hedge_types = load_hedge_types(hedge_path)
    hits = load_units(in_path)
    with open(out_path, 'w') as csv_file:
        writer = csv_writer(csv_file, delimiter='\t', quotechar='\"')
        writer.writerow(['segment', 'proposition', 'belief_type'])
        for hedge, sent, judgement, confidence in hits:
            if confidence > threshold:
                if judgement == 'yes':
                    writer.writerow([change_tag(sent), hedge, hedge_types[hedge]])
                else:
                    writer.writerow([change_tag(sent), hedge, 'NH'])
Beispiel #18
0
def list_to_csv(ls, path, hedge_path):
    hedge_types = load_hedge_types(hedge_path)
    writer = csv_writer(open(path, 'w'), delimiter='\t', quotechar='\"')
    writer.writerow([
            'segment',
            'proposition',
            'belief_type'])
    for unit, judgements in ls:
        hedge, sent = unit
        yes, no = judgements['TRUE'], judgements['FALSE']
        if yes > no:
            writer.writerow([change_tag(sent), hedge, hedge_types[hedge]])
        else:
            writer.writerow([change_tag(sent), hedge, 'NH'])
Beispiel #19
0
def movers_list(osm, auth, sections, age=None, term=None,
                csv=False, no_headers=False):
    group = Group(osm, auth, MAPPING.keys(), term)

    rows = []

    for section in sections:
        section_ = group._sections.sections[Group.SECTIONIDS[section]]

        headers = ['firstname', 'lastname', 'real_age', 'dob',
                   "Date Parents Contacted", "Parents Preference",
                   "Date Leaders Contacted", "Agreed Section",
                   "Starting Date", "Leaving Date", "Notes", "Priority",
                   '8', '10 1/2', '14 1/2']

        movers = section_.movers

        if age:
            threshold = (365 * float(age))
            now = datetime.datetime.now()
            age_fn = lambda dob: (now - datetime.datetime.strptime(dob, '%Y-%m-%d')).days

            movers = [mover for mover in section_.movers
                      if age_fn(mover['dob']) > threshold]

        now = datetime.datetime.now()
        for mover in movers:
            real_dob = datetime.datetime.strptime(mover['dob'], '%Y-%m-%d')
            rel_age = relativedelta.relativedelta(now, real_dob)
            mover['real_age'] = "{0:02d}.{0:02d}".format(rel_age.years, rel_age.months)
            mover['8'] = (real_dob+relativedelta.relativedelta(years=8)).strftime("%b %y")
            mover['10 1/2'] = (real_dob + relativedelta.relativedelta(years=10, months=6)).strftime("%b %y")
            mover['14 1/2'] = (real_dob + relativedelta.relativedelta(years=14, months=6)).strftime("%b %y")

        rows += [[section_['sectionname']] +
                 [member[header] for header in headers]
                  for member in movers]

    headers = ["Current Section"] + headers

    if csv:
        w = csv_writer(sys.stdout)
        if not no_headers:
            w.writerow(list(headers))
        w.writerows(rows)
    else:
        if not no_headers:
            print(tabulate.tabulate(rows, headers=headers))
        else:
            print(tabulate.tabulate(rows, tablefmt="plain"))
Beispiel #20
0
 def process(self, files):
     writer = csv_writer(self.fout, dialect=excel)
     
     # Write the header
     writer.writerow(self.fields)
     
     for banner in iterate_files(files):
         try:
             row = []
             for field in self.fields:
                 value = self.banner_field(banner, field)
                 row.append(value)
             writer.writerow(row)
         except Exception:
             pass
Beispiel #21
0
def sightings_to_csv(since=None, output='sightings.csv'):
    from csv import writer as csv_writer

    if since:
        conf.REPORT_SINCE = since
    with session_scope() as session:
        sightings = get_sightings_per_pokemon(session)
    od = OrderedDict()
    for pokemon_id in range(1, 252):
        if pokemon_id not in sightings:
            od[pokemon_id] = 0
    od.update(sightings)
    with open(output, 'wt') as csvfile:
        writer = csv_writer(csvfile)
        writer.writerow(('pokemon_id', 'count'))
        for item in od.items():
            writer.writerow(item)
Beispiel #22
0
    def process(self, files):
        writer = csv_writer(self.fout, dialect=excel)

        # Write the header
        writer.writerow(self.fields)

        for banner in iterate_files(files):
            # The "vulns" property can't be nicely flattened as-is so we turn
            # it into a list before processing the banner.
            if 'vulns' in banner:
                banner['vulns'] = banner['vulns'].keys()

            try:
                row = []
                for field in self.fields:
                    value = self.banner_field(banner, field)
                    row.append(value)
                writer.writerow(row)
            except Exception:
                pass
Beispiel #23
0
def contacts_detail(osm, auth, sections, csv=False, term=None, no_headers=False):
    group = Group(osm, auth, MAPPING.keys(), term)
    section_map = {'Garrick': 'Beavers',
                   'Paget': 'Beavers',
                   'Swinfen': 'Beavers',
                   'Maclean': 'Cubs',
                   'Somers': 'Cubs',
                   'Rowallan': 'Cubs',
                   'Erasmus': 'Scouts',
                   'Boswell': 'Scouts',
                   'Johnson': 'Scouts'}
    rows = []

    def add_row(section, member):
        rows.append([section_map[section], section, member['first_name'], member['last_name'],
                     member['date_of_birth'],
                     member['contact_primary_1.email1'],
                     member['contact_primary_1.address1'],
                     member['contact_primary_1.address2'],
                     member['contact_primary_1.address3'],
                     member['contact_primary_1.postcode'],
                     member['contact_primary_2.address1'],
                     member['floating.gender'].lower()])

    for section in sections:
        for member in group.section_all_members(section):
            add_row(section, member)

    headers = ["Section", "Section Name", "First", "Last", "DOB", "Email1", "Address1", "Address1.1", "Address1.2", "Address1.3",
               "Address2", "Address3", "Gender"]

    if csv:
        w = csv_writer(sys.stdout)
        if not no_headers:
            w.writerow(list(headers))
        w.writerows(rows)
    else:
        if not no_headers:
            print(tabulate.tabulate(rows, headers=headers))
        else:
            print(tabulate.tabulate(rows, tablefmt="plain"))
Beispiel #24
0
def events_attendees(osm, auth, sections, event,
                     term=None, csv=False, attending_only=False,
                     no_headers=False):
    group = Group(osm, auth, MAPPING.keys(), term)

    for section in sections:
        section_ = group._sections.sections[Group.SECTIONIDS[section]]
        ev = section_.events.get_by_name(event)
        if not ev:
            log.error("No such event: {}".format(event))
            sys.exit(0)
        attendees = ev.attendees
        mapping = ev.fieldmap
        if attending_only:
            attendees = [attendee for attendee in attendees
                         if attendee['attending'] == "Yes"]

        extra_fields = {
            'patrol': 'Six',
            'age': 'Age',
        }

        def fields(attendee):
            out = [str(attendee[_[1]]) for _ in mapping] + \
                  [section_.members.get_by_event_attendee(attendee)[_] for _ in
                   extra_fields.keys()]
            return out

        output = [fields(attendee)
                  for attendee in attendees if section_.members.is_member(attendee['scoutid'])]
        headers = [_[0] for _ in mapping] + list(extra_fields.values())
        if csv:
            w = csv_writer(sys.stdout)
            if not no_headers:
                w.writerow(list(headers))
            w.writerows(output)
        else:
            if not no_headers:
                print(tabulate.tabulate(output, headers=headers))
            else:
                print(tabulate.tabulate(output, tablefmt="plain"))
Beispiel #25
0
def csv(request):
    # Create the HttpResponse object with the appropriate CSV header.
    response = HttpResponse(content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename="quantify.csv"'

    writer = csv_writer(response,delimiter=';',quotechar='"',quoting=QUOTE_NONNUMERIC)

    cols = ['Date']
    for field in Field.objects.all():
        cols.append(field.name.encode('utf-8'))
    writer.writerow(cols)

    for entry in Entry.objects.all():
        row = [entry.date]
        for col in cols[1:]:
            try:
                row.append(entry.records.get(field__name=col).value)
            except Record.DoesNotExist:
                row.append('')
        writer.writerow(row)

    return response
Beispiel #26
0
def members_badges(osm, auth, sections, csv=False, no_headers=False, term=None):
    group = Group(osm, auth, MAPPING.keys(), term)

    for section in sections:
        # members = group._sections.sections[Group.SECTIONIDS[section]].members
        members = group.section_yp_members_without_leaders(section)
        rows = []
        for member in members:
            badges = member.get_badges(section_type=group.SECTION_TYPE[section])
            if badges:
                # If no badges - probably a leader
                challenge_new = len([badge for badge in badges
                                     if badge['awarded'] == '1' and badge['badge_group'] == '1'
                                     and not badge['badge'].endswith('(Pre 2015)')])
                challenge_old = len([badge for badge in badges
                                     if badge['awarded'] == '1' and badge['badge_group'] == '1'
                                     and badge['badge'].endswith('(Pre 2015)')])

                activity = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '2'])
                staged = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '3'])
                core = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '4'])

                rows.append([member['date_of_birth'], member['last_name'], member['age'], section,
                             challenge_new, challenge_old, activity, staged, core])

        headers = ["DOB", "Last Name", "Age", "Section Name", "Challenge", "Challenge_old", "Staged", "Activity", "Core"]

        if csv:
            w = csv_writer(sys.stdout)
            if not no_headers:
                w.writerow(list(headers))
            w.writerows(rows)
        else:
            if not no_headers:
                print(tabulate.tabulate(rows, headers=headers))
            else:
                print(tabulate.tabulate(rows, tablefmt="plain"))
Beispiel #27
0
def CreateMuskingumXFileFromDranageLine(in_drainage_line,
                                        x_id,
                                        out_x_file,
                                        file_geodatabase=None):
    """
    Create muskingum X file from drainage line.

    Args:
        in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile.
        x_id(str): The name of the muksingum X field (i.e. 'Musk_x').
        out_x_file(str): The path to the output x file.
        file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.)
    
    Example::
    
        from RAPIDpy.gis.muskingum import CreateMuskingumXFileFromDranageLine
        #------------------------------------------------------------------------------
        #main process
        #------------------------------------------------------------------------------
        if __name__ == "__main__":
            CreateMuskingumXFileFromDranageLine(in_drainage_line='/path/to/drainageline.shp',
                                                x_id='Musk_x',
                                                out_x_file='/path/to/x.csv',
                                                )
    """
    if file_geodatabase:
        gdb_driver = ogr.GetDriverByName("OpenFileGDB")
        ogr_file_geodatabase = gdb_driver.Open(file_geodatabase)
        ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line)
    else:
        ogr_drainage_line_shapefile = ogr.Open(in_drainage_line)
        ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer()

    with open_csv(out_x_file,'w') as kfile:
        x_writer = csv_writer(kfile)
        for drainage_line_feature in ogr_drainage_line_shapefile_lyr:
            x_writer.writerow([drainage_line_feature.GetField(x_id)])    
Beispiel #28
0
def download_monthlyData(request):
    """
    Get data for stations
    """

    get_data = request.GET

    try:
        codEstacion = get_data['stationcode']
        nomEstacion = get_data['stationname']
        nomCountry = get_data['countryname']

        dir_base = os.path.dirname(__file__)
        url = os.path.join(dir_base, 'public/Data',
                           codEstacion + '-MONTHLY.csv')
        '''
        with open(url) as csvfile:
            readCSV = csv.reader(csvfile, delimiter=',')
            readCSV.next()
            datesDischarge = []
            dataDischarge = []
            for row in readCSV:
                da = row[0]
                year = int(da[0:4])
                month = int(da[5:7])
                day = int(da[8:10])
                dat = row[1]
                dat = float(dat)
                if dat < 0:
                    dat = np.nan
                dat = str(dat)
                datesDischarge.append(dt.datetime(year, month, day))
                dataDischarge.append(dat)
        '''

        df = pd.read_csv(url, index_col=0)
        df.index = pd.to_datetime(df.index)
        df = df[df.iloc[:, 0] >= 0]

        datesDischarge = df.index.tolist()
        dataDischarge = df.iloc[:, 0].values

        pairs = [list(a) for a in zip(datesDischarge, dataDischarge)]

        response = HttpResponse(content_type='text/csv')

        response[
            'Content-Disposition'] = 'attachment; filename=monthly_data_{0}-{1}-{2}.csv'.format(
                nomEstacion, codEstacion, nomCountry)

        writer = csv_writer(response)

        writer.writerow(['datetime', 'streamflow (m3/s)'])

        for row_data in pairs:
            writer.writerow(row_data)

        return response

    except Exception as e:
        print(str(e))
        return JsonResponse(
            {'error': 'No observed data found for the selected station.'})
Beispiel #29
0
def FlowlineToPoint(in_drainage_line,
                    river_id,
                    out_csv_file,
                    file_geodatabase=None):
    """
    Converts flowline feature to a list of centroid points with their rivid
    in EPSG:4326.

    Parameters
    ----------
    in_drainage_line: str
        Path to the stream network (i.e. Drainage Line) shapefile.
    river_id: str
        The name of the field with the river ID
        (Ex. 'HydroID', 'COMID', or 'LINKNO').
    out_csv_file: str
        Path to the output csv file with the centroid points.
    file_geodatabase: str, optional
        Path to the file geodatabase. If you use this option, in_drainage_line
         is the name of the stream network feature class
         (WARNING: Not always stable with GDAL).


    Example::

        from RAPIDpy.gis.centroid import FlowlineToPoint

        FlowlineToPoint(
            in_drainage_line='/path/to/drainageline.shp',
            river_id='LINKNO',
            out_csv_file='/path/to/comid_lat_lon_z.csv')

    """
    ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \
        open_shapefile(in_drainage_line, file_geodatabase)

    ogr_drainage_line_shapefile_lyr_proj = \
        ogr_drainage_line_shapefile_lyr.GetSpatialRef()
    osr_geographic_proj = osr.SpatialReference()
    osr_geographic_proj.ImportFromEPSG(4326)
    proj_transform = None
    if ogr_drainage_line_shapefile_lyr_proj != osr_geographic_proj:
        proj_transform = osr.CoordinateTransformation(
            ogr_drainage_line_shapefile_lyr_proj, osr_geographic_proj)

    # print valid field names to table
    with open_csv(out_csv_file, 'w') as outfile:
        writer = csv_writer(outfile)
        writer.writerow(['rivid', 'lat', 'lon', 'z'])
        for feature in ogr_drainage_line_shapefile_lyr:
            feat_geom = feature.GetGeometryRef()
            if proj_transform:
                feat_geom.Transform(proj_transform)
            centroid = feat_geom.Centroid()
            centroid_pt = centroid.GetPoint(0)
            writer.writerow([
                feature.GetField(river_id), centroid_pt[1], centroid_pt[0],
                centroid_pt[2]
            ])

    del ogr_drainage_line_shapefile
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

'''
Lê CNPJs e CPFs na entrada padrão e escreve um CSV na saída com o resultado da
consulta da prestação de contas da campanha de 2008.
'''

import sys
from csv import writer as csv_writer

from tse.prestacao_de_contas import doador_2008

if __name__ == '__main__':
    csv = csv_writer(sys.stdout)
    csv.writerow(doador_2008.campos)

    for line in sys.stdin:
        cnpj_ou_cpf = line.strip()
        #sys.stderr.write('Pesquisando %s...\n' % cnpj_ou_cpf)
        campos = doador_2008(cnpj_ou_cpf)
        if campos:
            csv.writerow(campos)


# vim:tabstop=4:expandtab:smartindent:encoding=utf8

def main(argv):
    ap = ArgumentParser(prog="generate-semantic-model")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("-i", "--initial-decays", default=5, type=int)
    ap.add_argument("-c", "--convergence-decays", default=2, type=int)
    ap.add_argument("-a", "--arc-epochs", default=3, type=int)
    ap.add_argument("-l", "--layers", default=2, type=int)
    ap.add_argument("-w", "--width", default=100, type=int)
    ap.add_argument("--word-input", default=False, action="store_true")
    ap.add_argument("-p", "--pre-existing", default=False, action="store_true")
    ap.add_argument("-m", "--monolith", default=False, action="store_true")
    ap.add_argument("--key-set", nargs="*", default=None)
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("states_dir")
    ap.add_argument("encoding_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir, True)
    user_log.info("Sem")
    hyper_parameters = model.HyperParameters(aargs.layers, aargs.width)
    extra = {
        "word_input": aargs.word_input,
        "monolith": aargs.monolith,
    }

    if aargs.pre_existing:
        sem = load_sem(lstm, aargs.encoding_dir)
    else:
        sem = generate_sem(lstm, hyper_parameters, extra, aargs.states_dir,
                           aargs.arc_epochs, aargs.encoding_dir, aargs.key_set,
                           aargs.initial_decays, aargs.convergence_decays)

    keys_sem, total_sem = test_model(lstm, sem, aargs.states_dir, False,
                                     aargs.key_set)
    # TODO
    #user_log.info("Baseline")
    #baseline = generate_baseline(aargs.data_dir, lstm, hyper_parameters, extra)
    #scores_baseline, totals_baseline = test_model(lstm, baseline, aargs.states_dir, True, aargs.key_set)

    with open(os.path.join(aargs.encoding_dir, "analysis-breakdown.csv"),
              "w") as fh:
        writer = csv_writer(fh)
        writer.writerow(["technique", "key", "perplexity"])

        for key, perplexity in sorted(keys_sem.items()):
            writer.writerow(["sem", key, "%f" % perplexity])

        #for key, scores in sorted(scores_baseline.items()):
        #    for name, score in sorted(scores.items()):
        #        writer.writerow(["baseline", key, name, "%f" % score])

    with open(os.path.join(aargs.encoding_dir, "analysis-totals.csv"),
              "w") as fh:
        writer = csv_writer(fh)
        writer.writerow(["technique", "perplexity"])
        writer.writerow(["sem", "%f" % total_sem])

        #for name, score in sorted(totals_baseline.items()):
        #    writer.writerow(["baseline", name, "%f" % score])

    return 0
def main(argv):
    ap = ArgumentParser(prog="extract-perplexity")
    ap.add_argument("log_file")
    aargs = ap.parse_args(argv)
    name_series = {}
    maximum_epoch = 0

    with open(aargs.log_file, "r") as fh:
        epoch = None
        arc_epochs = None
        stored_validation = None
        stored_name = None

        for line in fh.readlines():
            name = matched_invocation(line)

            if name is not None:
                epoch = 0
                arc_epochs = 0
                stored_name = name
                name_series[stored_name] = {}

            if stored_name is not None:
                if matches_epoch(line):
                    arc_epochs += 1

                if matches_load(line):
                    arc_epochs = 0
                    stored_validation = None

                if matches_save(line):
                    epoch += arc_epochs
                    arc_epochs = 0

                    if epoch > maximum_epoch:
                        maximum_epoch = epoch

                    if stored_validation is not None:
                        name_series[stored_name][epoch] = (stored_validation,
                                                           None)
                        stored_validation = None

                validation = matched_validation(line)

                if validation is not None:
                    stored_validation = validation

                test = matched_test(line)

                if test is not None:
                    name_series[stored_name][epoch] = (
                        name_series[stored_name][epoch][0], test)

                total = matched_total(line)

                if total is not None:
                    if name_series[stored_name][epoch][1] is None:
                        name_series[stored_name][epoch] = (
                            name_series[stored_name][epoch][0], total)

                    stored_name = None

    header = ["epoch"]

    for name, series in sorted(name_series.items()):
        header += ["%s - Dev" % name, "%s - Test" % name]

    row_columns = []

    for epoch in range(maximum_epoch):
        if any([epoch in series for name, series in name_series.items()]):
            row = [epoch]

            for name, series in sorted(name_series.items()):
                if epoch in series:
                    values = series[epoch]
                    row += [
                        "%.4f" % values[0],
                        "" if values[1] is None else "%.4f" % values[1]
                    ]
                else:
                    row += ["", ""]

            row_columns += [row]

    writer = csv_writer(sys.stdout)
    writer.writerow(header)

    for row in row_columns:
        writer.writerow(row)

    return 0
Beispiel #33
0
average_GFP = wf.average_qualifying_value_per_region(qualifying_GFP,
                                                                          in_channel=['pre_cell_labels', 'projected_GFP', 'qualifying_GFP'],
                                                                          out_channel=['average_GFP', 'average_GFP_pad'])

pre_render = examples.xi_support.xi_pre_render(average_GFP,
                                               in_channel=['name pattern',
                                         'projected_GFP', 'qualifying_GFP',
                                         'pre_cell_labels',
                                         'average_GFP_pad', 'projected_mCh',
                                         'mCherry', 'GFP', 'group id'],
                                               out_channel='_',
                                               save=True)

# since the resolution for the mitochondria is so much lower when compared to yeast cells, we will
# have to perform a bit more straightforward of cutting and

with open('xi_analys_results.csv', 'wb') as output_file:
        writer = csv_writer(output_file)
        writer.writerow(['file', 'time', 'correlation coeff',
                         'median GFP', 'average GFP', 'linreg slope', 'linreg rvalue',
                         'linreg pvalue'])

prev_time = time()

for primary_namespace in pre_render:
    print '%s - analyzed %s - %s in %s' % (strftime('%X %x'),
                                           primary_namespace['name pattern'],
                                           primary_namespace['group id'],
                                           time() - prev_time)
    prev_time = time()
Beispiel #34
0
def xi_pre_render(name_pattern, proj_gfp, qual_gfp, cell_labels, average_gfp_pad, proj_mch,
                  mch, gfp, timestamp,
                  save=False, directory_to_save_to='verification', mch_cutoff=0.2, slector_cutoff=0.1):

    plt.figure(figsize=(20, 15))

    plt.suptitle(name_pattern)

    main_ax = plt.subplot(231)
    plt.title('GFP')
    plt.imshow(proj_gfp, interpolation='nearest')
    plt.contour(cell_labels > 0, [0.5], colors='w')

    plt.subplot(232, sharex=main_ax, sharey=main_ax)
    plt.title('log-GFP')
    plt.imshow(np.log(proj_gfp + np.min(proj_gfp[proj_gfp > 0])), cmap='hot', interpolation='nearest')
    plt.contour(cell_labels > 0, [0.5], colors='w')

    plt.subplot(233, sharex=main_ax, sharey=main_ax)
    plt.title('raw segmentation')
    plt.imshow(qual_gfp, cmap='gray', interpolation='nearest')
    plt.contour(cell_labels > 0, [0.5], colors='w')

    ax = plt.subplot(234, sharex=main_ax, sharey=main_ax)
    plt.title('labeled segmentation')
    plt.imshow(cell_labels, cmap=plt.cm.spectral, interpolation='nearest')
    unique = np.unique(cell_labels)
    for i in unique:
        mask = cell_labels == i
        x, y = scipy.ndimage.measurements.center_of_mass(mask)
        ax.text(y-8, x+8, '%s' % i, fontsize=10)

    plt.subplot(235)
    selector = np.logical_and(mch > slector_cutoff, gfp > slector_cutoff)
    plt.title('mCh-GFP correlation - %s, qual GFP intensity: %s' %
              (np.corrcoef(mch[selector], gfp[selector])[0, 1], np.median(gfp[mch > mch_cutoff])))
    slope, intercept, rvalue, pvalue, stderr = linregress(mch[selector], gfp[selector])
    better2D_desisty_plot(mch[selector], gfp[selector])
    linarray = np.arange(0.1, 0.5, 0.05)
    plt.plot(linarray, intercept+slope*linarray, 'r')
    plt.xlabel('mCherry')
    plt.ylabel('GFP')

    plt.subplot(236, sharex=main_ax, sharey=main_ax)
    plt.title('mCherry')
    plt.imshow(proj_mch, interpolation='nearest')
    plt.contour(cell_labels > 0, [0.5], colors='w')

    with open('xi_analys_results.csv', 'ab') as output_file:
        writer = csv_writer(output_file)

        puck = [name_pattern, timestamp,
                np.corrcoef(mch[selector], gfp[selector])[0, 1],
                np.median(gfp[mch > mch_cutoff]), np.average(gfp[mch > mch_cutoff]),
                slope, rvalue, pvalue]
        writer.writerow(puck)

    if not save:
        plt.show()

    else:
        name_puck = directory_to_save_to+'/'+'xi_pre_render-'+timestamp+'-'+name_pattern+'.png'
        plt.savefig(name_puck)
        plt.close()
Beispiel #35
0
    def write_flows_to_csv(self, path_to_output_file,
                           river_index=None, 
                           river_id=None,
                           date_search_start=None,
                           date_search_end=None,
                           daily=False,
                           mode="mean"):
        """
        Write out RAPID output to CSV file.
        
        .. note:: Need either *reach\_id* or *reach\_index* parameter, but either can be used.
        
        Parameters:
            path_to_output_file(str): Path to the output csv file.
            river_index(Optional[datetime]): This is the index of the river in the file you want the streamflow for.
            river_id(Optional[datetime]): This is the river ID that you want the streamflow for.
            date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting.
            date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending.
            daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows.
            mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean".

        Example writing entire time series to file:
        
        .. code:: python
        
            from RAPIDpy import RAPIDDataset
    
            river_id = 3624735
            path_to_rapid_qout = '/path/to/Qout.nc'

            with RAPIDDataset(path_to_rapid_qout) as qout_nc:
                #for writing entire time series to file
                qout_nc.write_flows_to_csv('/timeseries/Qout_3624735.csv', 
                                           river_id=river_id,
                                           )
                                      
                
                #if file is CF compliant, you can write out daily average

                #NOTE: Getting the river index is not necessary
                #this is just an example of how to use this                                      
                river_index = qout_nc.get_river_index(river_id)
                qout_nc.write_flows_to_csv('/timeseries/Qout_daily.csv',
                                           river_index=river_index,
                                           daily=True,
                                           )
                                           
        Example writing entire time series as daily average to file:
        
        .. code:: python
        
            from RAPIDpy import RAPIDDataset
    
            river_id = 3624735
            path_to_rapid_qout = '/path/to/Qout.nc'

            with RAPIDDataset(path_to_rapid_qout) as qout_nc:
                #NOTE: Getting the river index is not necessary
                #this is just an example of how to use this                                      
                river_index = qout_nc.get_river_index(river_id)
                
                #if file is CF compliant, you can write out daily average
                qout_nc.write_flows_to_csv('/timeseries/Qout_daily.csv',
                                           river_index=river_index,
                                           daily=True,
                                           )

        Example writing entire time series as daily average to file:
        
        .. code:: python
        
            from datetime import datetime
            from RAPIDpy import RAPIDDataset
    
            river_id = 3624735
            path_to_rapid_qout = '/path/to/Qout.nc'

            with RAPIDDataset(path_to_rapid_qout) as qout_nc:
                #if file is CF compliant, you can filter by date
                qout_nc.write_flows_to_csv('/timeseries/Qout_daily_date_filter.csv',
                                           river_id=river_id,
                                           daily=True,
                                           date_search_start=datetime(2002, 8, 31),
                                           date_search_end=datetime(2002, 9, 15),
                                           mode="max"
                                           )        
        """
        if river_id != None:
            river_index = self.get_river_index(river_id)
        elif river_id == None and river_index == None:
            raise Exception("ERROR: Need reach id or reach index ...")

        #analyze and write
        if self.is_time_variable_valid() or self._is_legacy_time_valid():
            time_index_range = self.get_time_index_range(date_search_start=date_search_start,
                                                         date_search_end=date_search_end)
                                                         
            qout_arr = self.get_qout_index(river_index, time_index_array=time_index_range)
            time_array = self.get_time_array(time_index_array=time_index_range, return_datetime=True)
            
            df = pd.DataFrame(qout_arr.T, index=time_array)

            if daily:
                df = df.resample('D')
                if mode == "mean":
                    df = df.mean()
                elif mode == "max":
                    df = df.max()
                else:
                    raise Exception("Invalid mode ...")
                
            df.to_csv(path_to_output_file, header=False)

        else:
            print("Valid time variable not found. Printing values only ...")
            qout_arr = self.get_qout_index(river_index)
            with open_csv(path_to_output_file, 'w') as outcsv:
                writer = csv_writer(outcsv)
                for index in xrange(len(qout_arr)):
                    writer.writerow([index, "{0:.5f}".format(qout_arr[index])])
Beispiel #36
0
def CreateMuskingumKfacFile(in_drainage_line,
                            river_id,
                            length_id,
                            slope_id,
                            celerity,
                            formula_type,
                            in_connectivity_file,
                            out_kfac_file,
                            length_units="km",
                            slope_percentage=False,
                            file_geodatabase=None):
    r"""
    Creates the Kfac file for calibration.

    The improved methods using slope to generate values
    for Kfac were used here:

    Tavakoly, A. A., A. D. Snow, C. H. David, M. L. Follum, D. R. Maidment,
    and Z.-L. Yang, (2016) "Continental-Scale River Flow Modeling of the
    Mississippi River Basin Using High-Resolution NHDPlus Dataset",
    Journal of the American Water Resources Association (JAWRA) 1-22.
    DOI: 10.1111/1752-1688.12456

    Formula Type Options:

    1. :math:`Kfac_n = \frac{RiverLength_n}{Celerity_n}`
    2. :math:`Kfac_n = \eta*\frac{RiverLength_n}{\sqrt{RiverSlope_n}}`
    3. :math:`Kfac_n = \eta*\frac{RiverLength_n}{\sqrt{RiverSlope_n}}\left[0.05, 0.95\right]`


    Where:

    :math:`a = \frac{\sum_{n=1}^{r} \frac{RiverLength_n}{Celerity_n}}{r}`

    :math:`b = \frac{\sum_{n=1}^{r} \frac{RiverLength_n}{\sqrt{RiverSlope_n}}}{r}`

    :math:`\eta = \frac{a}{b}`

    r = Number of river segments.


    Parameters
    ----------
    in_drainage_line: str
        Path to the stream network (i.e. Drainage Line) shapefile.
    river_id: str
        The name of the field with the river ID
        (Ex. 'HydroID', 'COMID', or 'LINKNO').
    length_id: str
        The field name containging the length of the river segment
        (Ex. 'LENGTHKM' or 'Length').
    slope_id: str
        The field name containging the slope of the river segment
        (Ex. 'Avg_Slope' or 'Slope').
    celerity: float
        The flow wave celerity for the watershed in meters per second.
        1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown.
    formula_type: int
        An integer representing the formula type to use when calculating kfac.
    in_connectivity_file: str
        The path to the RAPID connectivity file.
    out_kfac_file: str
        The path to the output kfac file.
    length_units: str, optional
        The units for the length_id field. Supported types are "m" for meters
        and "km" for kilometers.
    slope_percentage: bool, optional
        If True, it assumes the slope given is in percentage and will
        divide by 100. Default is False.
    file_geodatabase: str, optional
        Path to the file geodatabase. If you use this option, in_drainage_line
         is the name of the stream network feature class
         (WARNING: Not always stable with GDAL).


    Example::

        from RAPIDpy.gis.muskingum import CreateMuskingumKfacFile

        CreateMuskingumKfacFile(
            in_drainage_line='/path/to/drainageline.shp',
            river_id='LINKNO',
            length_id='Length',
            slope_id='Slope',
            celerity=1000.0/3600.0,
            formula_type=3,
            in_connectivity_file='/path/to/rapid_connect.csv',
            out_kfac_file='/path/to/kfac.csv',
            length_units="m",
        )
    """  # noqa
    ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \
        open_shapefile(in_drainage_line, file_geodatabase)

    number_of_features = ogr_drainage_line_shapefile_lyr.GetFeatureCount()
    river_id_list = np.zeros(number_of_features, dtype=np.int32)

    length_list = \
        np.zeros(number_of_features, dtype=np.float32)
    slope_list = np.zeros(number_of_features, dtype=np.float32)
    for feature_idx, drainage_line_feature in \
            enumerate(ogr_drainage_line_shapefile_lyr):
        river_id_list[feature_idx] = drainage_line_feature.GetField(river_id)
        length = drainage_line_feature.GetField(length_id)
        if length is not None:
            length_list[feature_idx] = length
        slope = drainage_line_feature.GetField(slope_id)
        if slope is not None:
            slope_list[feature_idx] = slope

    del ogr_drainage_line_shapefile

    if slope_percentage:
        slope_list /= 100.0

    if length_units == "m":
        length_list /= 1000.0
    elif length_units != "km":
        raise Exception("Invalid length units supplied. "
                        "Supported units are m and km.")

    connectivity_table = np.loadtxt(in_connectivity_file,
                                    delimiter=",",
                                    ndmin=2,
                                    dtype=int)

    length_slope_array = []
    kfac2_array = []
    if formula_type == 1:
        log("River Length/Celerity")
    elif formula_type == 2:
        log("Eta*River Length/Sqrt(River Slope)")
    elif formula_type == 3:
        log("Eta*River Length/Sqrt(River Slope) [0.05, 0.95]")
    else:
        raise Exception("Invalid formula type. Valid range: 1-3 ...")

    with open_csv(out_kfac_file, 'w') as kfacfile:
        kfac_writer = csv_writer(kfacfile)
        for row in connectivity_table:
            stream_id = int(float(row[0]))

            stream_id_index = river_id_list == stream_id
            # find the length
            stream_length = length_list[stream_id_index] * 1000.0

            if formula_type >= 2:
                # find the slope
                stream_slope = slope_list[stream_id_index]

                if stream_slope <= 0:
                    # if no slope, take average of upstream
                    # and downstream to get it
                    next_down_id = int(float(row[1]))
                    next_down_slope = 0
                    try:
                        next_down_index = \
                            np.where(river_id_list == next_down_id)[0][0]
                        next_down_slope = slope_list[next_down_index]
                    except IndexError:
                        pass

                    next_up_id = int(float(row[3]))
                    next_up_slope = 0
                    try:
                        next_up_index = \
                            np.where(river_id_list == next_up_id)[0][0]
                        next_up_slope = slope_list[next_up_index]
                    except IndexError:
                        pass

                    stream_slope = (next_down_slope + next_up_slope) / 2.0
                    if stream_slope <= 0:
                        # if still no slope, set to 0.001
                        stream_slope = 0.001

                length_slope_array.append(stream_length / stream_slope**0.5)
                kfac2_array.append(stream_length / celerity)
            else:
                kfac = stream_length / celerity
                kfac_writer.writerow(kfac)

        if formula_type >= 2:
            if formula_type == 3:
                log("Filtering Data by 5th and 95th Percentiles ...")
                length_slope_array = np.array(length_slope_array)
                percentile_5 = np.percentile(length_slope_array, 5)
                percentile_95 = np.percentile(length_slope_array, 95)

                length_slope_array[length_slope_array < percentile_5] = \
                    percentile_5
                length_slope_array[length_slope_array > percentile_95] = \
                    percentile_95

            eta = np.mean(kfac2_array) / np.mean(length_slope_array)
            log("Kfac2_Avg {0}".format(np.mean(kfac2_array)))
            log("Length_Slope Avg {0}".format(np.mean(length_slope_array)))
            log("Eta {0}".format(eta))
            log("Writing Data ...")
            for len_slope in length_slope_array:
                kfac_writer.writerow(eta * len_slope)
Beispiel #37
0
def CreateMuskingumKfacFile(in_drainage_line,
                            river_id,
                            length_id,
                            slope_id,
                            celerity,
                            formula_type,
                            in_connectivity_file,
                            out_kfac_file,
                            length_units="km",
                            slope_percentage=False,
                            file_geodatabase=None):
    """
    Creates the Kfac file for calibration.

    Formula Type Options:
    
    1. River Length/Celerity; 
    2. Eta*River Length/Sqrt(River Slope); and 3 is
    3. Eta*River Length/Sqrt(River Slope) [0.05, 0.95]
    
    Where Eta = Average(River Length/Co of all rivers) / Average(River Length/Sqrt(River Slope) of all rivers)
    
    Args:
        in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile.
        river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO').
        length_id(str): The field name containging the length of the river segment (Ex. 'LENGTHKM' or 'Length').
        slope_id(str): The field name containging the slope of the river segment (Ex. 'Avg_Slope' or 'Slope').
        celerity(float): The flow wave celerity for the watershed in meters per second. 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown.
        formula_type(int): An integer representing the formula type to use when calculating kfac. 
        in_connectivity_file(str): The path to the RAPID connectivity file.
        out_kfac_file(str): The path to the output kfac file.
        length_units(Optional[str]): The units for the length_id field. Supported types are "m" for meters and "km" for kilometers.
        slope_percentage(Optional[bool]): If True, it assumes the slope given is in percentage and will divide by 100. Default is False.
        file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.)
    
    Example::
    
        from RAPIDpy.gis.muskingum import CreateMuskingumKfacFile
        #------------------------------------------------------------------------------
        #main process
        #------------------------------------------------------------------------------
        if __name__ == "__main__":
            CreateMuskingumKfacFile(in_drainage_line='/path/to/drainageline.shp',
                                    river_id='LINKNO',
                                    length_id='Length',
                                    slope_id='Slope',
                                    celerity=1000.0/3600.0,
                                    formula_type=3,
                                    in_connectivity_file='/path/to/rapid_connect.csv',
                                    out_kfac_file='/path/to/kfac.csv',
                                    length_units="m",
                                    )    
    """
    if file_geodatabase:
        gdb_driver = ogr.GetDriverByName("OpenFileGDB")
        ogr_file_geodatabase = gdb_driver.Open(file_geodatabase)
        ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(
            in_drainage_line)
    else:
        ogr_drainage_line_shapefile = ogr.Open(in_drainage_line)
        ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer(
        )

    number_of_features = ogr_drainage_line_shapefile_lyr.GetFeatureCount()
    river_id_list = np.zeros(number_of_features, dtype=np.int32)
    length_list = np.zeros(number_of_features, dtype=np.float32)
    slope_list = np.zeros(number_of_features, dtype=np.float32)
    for feature_idx, drainage_line_feature in enumerate(
            ogr_drainage_line_shapefile_lyr):
        river_id_list[feature_idx] = drainage_line_feature.GetField(river_id)
        length = drainage_line_feature.GetField(length_id)
        if length is not None:
            length_list[feature_idx] = length
        slope = drainage_line_feature.GetField(slope_id)
        if slope is not None:
            slope_list[feature_idx] = slope

    if slope_percentage:
        slope_list /= 100.0

    if length_units == "m":
        length_list /= 1000.0
    elif length_units != "km":
        raise Exception(
            "ERROR: Invalid length units supplied. Supported units are m and km."
        )

    connectivity_table = np.loadtxt(in_connectivity_file,
                                    delimiter=",",
                                    ndmin=2,
                                    dtype=int)

    length_slope_array = []
    kfac2_array = []
    if formula_type == 1:
        print("River Length/Celerity")
    elif formula_type == 2:
        print("Eta*River Length/Sqrt(River Slope)")
    elif formula_type == 3:
        print("Eta*River Length/Sqrt(River Slope) [0.05, 0.95]")
    else:
        raise Exception("Invalid formula type. Valid range: 1-3 ...")

    with open_csv(out_kfac_file, 'w') as kfacfile:
        kfac_writer = csv_writer(kfacfile)
        for row in connectivity_table:
            streamID = int(float(row[0]))

            streamIDindex = river_id_list == streamID
            # find the length
            stream_length = length_list[streamIDindex] * 1000.0

            if formula_type >= 2:
                # find the slope
                stream_slope = slope_list[streamIDindex]

                if stream_slope <= 0:
                    #if no slope, take average of upstream and downstream to get it
                    nextDownID = int(float(row[1]))
                    next_down_slope = 0
                    try:
                        next_down_index = np.where(
                            river_id_list == nextDownID)[0][0]
                        next_down_slope = slope_list[next_down_index]
                    except IndexError:
                        pass

                    nextUpID = int(float(row[3]))
                    next_up_slope = 0
                    try:
                        next_up_index = np.where(
                            river_id_list == nextUpID)[0][0]
                        next_up_slope = slope_list[next_up_index]
                    except IndexError:
                        pass

                    stream_slope = (next_down_slope + next_up_slope) / 2.0
                    if stream_slope <= 0:
                        #if still no slope, set to 0.001
                        stream_slope = 0.001

                length_slope_array.append(stream_length / stream_slope**0.5)
                kfac2_array.append(stream_length / celerity)
            else:
                kfac = stream_length / celerity
                kfac_writer.writerow(kfac)

        if formula_type >= 2:
            if formula_type == 3:
                print("Filtering Data by 5th and 95th Percentiles ...")
                length_slope_array = np.array(length_slope_array)
                percentile_5 = np.percentile(length_slope_array, 5)
                percentile_95 = np.percentile(length_slope_array, 95)

                length_slope_array[
                    length_slope_array < percentile_5] = percentile_5
                length_slope_array[
                    length_slope_array > percentile_95] = percentile_95

            eta = np.mean(kfac2_array) / np.mean(length_slope_array)
            print("Kfac2_Avg {0}".format(np.mean(kfac2_array)))
            print("Length_Slope Avg {0}".format(np.mean(length_slope_array)))
            print("Eta {0}".format(eta))
            print("Writing Data ...")
            for len_slope in length_slope_array:
                kfac_writer.writerow(eta * len_slope)
        clean_dept = departments.clean(current_dept)
        new_rows.append(
            [clean_dept, class_match['id'], class_match['name'], total])
    elif label == 'Total':
        # When 'Total' row is reached, reset current_dept
        current_dept = ''
    else:
        # Some departments span multiple lines
        if current_dept:
            current_dept = current_dept + ' ' + label.strip()
        else:
            current_dept = label.strip()

# Add year and fund columns to each row
new_rows = [['2017', 'General Fund'] + row for row in new_rows]

# Group rows by everything but total and aggregate the total (sum)
new_rows = aggregate_similar_rows(new_rows, 5)

# Sort rows for idempotency
new_rows.sort()

header = ['Fiscal Year', 'Fund', 'Department', 'Class ID', 'Class', 'Total']

with open(OUTPUT_FILE_PATH, 'wb') as f:
    writer = csv_writer(f)
    writer.writerow(header)
    writer.writerows(new_rows)

print('Wrote {0} rows to {1}'.format(len(new_rows), OUTPUT_FILE_PATH))
Beispiel #39
0
def run(
    dir_MRI="data/ALFA_PET",
    dir_PET="data/ALFA_PET",
    dir_RR="data/Atlas/CL_2mm",
    outfile="data/ALFA_PET/Quant_realigned.csv",
    glob_PET="*_PET.nii.gz",
    glob_MRI="*_MRI.nii.gz",
):
    """
    Args:
      dir_MRI (str or Path): MRI directory
      dir_PET (str or Path): PET directory
      dir_RR (str or Path): Reference regions ROIs directory
        (standard Centiloid RR from GAAIN Centioid website: 2mm, nifti)
      outfile (str or Path): Output quantification file
    Returns:
      fname (list[str])
      greyCerebellum (list[float])
      wholeCerebellum (list[float])
      wholeCerebellumBrainStem (list[float])
      pons (list[float])
    """
    # PET & MR images lists
    s_PET_dir = list(tmap(gunzip, Path(dir_PET).glob(glob_PET), leave=False))
    s_MRI_dir = list(tmap(gunzip, Path(dir_MRI).glob(glob_MRI), leave=False))
    if len(s_PET_dir) != len(s_MRI_dir):
        raise IndexError("Different number of PET and MR images")

    eng = get_matlab()
    dir_spm = fspath(Path(eng.which("spm")).parent)

    for d_PET, d_MRI in tzip(s_PET_dir, s_MRI_dir):
        with tic("Step 0: Reorient PET subject"):
            eng.f_acpcReorientation(d_PET, nargout=0)

        with tic("Step 0: Reorient MRI subject"):
            eng.f_acpcReorientation(d_MRI, nargout=0)

        with tic("Step 1: CorregisterEstimate"):
            eng.f_1CorregisterEstimate(d_MRI, dir_spm, nargout=0)
        # Check Reg

        with tic("Step 2: CorregisterEstimate"):
            eng.f_2CorregisterEstimate(d_MRI, d_PET, nargout=0)
        # Check Reg

        with tic("Step 3: Segment"):
            eng.f_3Segment(d_MRI, dir_spm, nargout=0)

        with tic("Step 4: Normalise"):
            d_file_norm = fspath(
                Path(d_MRI).parent / ("y_" + Path(d_MRI).name))
            eng.f_4Normalise(d_file_norm, d_MRI, d_PET, nargout=0)

    s_PET = list(
        map(
            fspath,
            Path(dir_PET).glob("w" + (glob_PET[:-3] if glob_PET.lower().
                                      endswith(".gz") else glob_PET))))
    res = eng.f_Quant_centiloid(s_PET, fspath(dir_RR), nargout=5)
    if outfile:
        with open(outfile, "w") as fd:
            f = csv_writer(fd)
            f.writerow(("Fname", "GreyCerebellum", "WholeCerebellum",
                        "WholeCerebellumBrainStem", "Pons"))
            f.writerows(zip(*res))
    return res
Beispiel #40
0
def check_logfile(fname, write_csv=False, nback=0, write_datetime=False):
    """
    run over a Cheetah logfile and analyzed reference settings etc
    """
    _, protocol, _ = parser(fname)
    base_name = os.path.splitext(os.path.basename(fname))[0]
    all_settings = analyze_drs(protocol)

    for i_setting, setting in enumerate(all_settings):
        print()
        if setting.folder is None:
            msg = 'Warning: Recording Stop -> Start without folder change!'
        else:
            msg = setting.folder

        print('Start: {} ({})'.format(setting.start_rec[1],
                                      setting.start_timestamp))
        print('Stop: {} ({})'.format(setting.stop_rec[1],
                                     setting.stop_timestamp))
        # print('Duration: {} min'.
        #      format((setting.stop_rec[1] - setting.start_rec[1])))
        out_str = create_rep(setting.num2name, setting.name2num, setting.crefs,
                             setting.lrefs, setting.grefs)
    if write_csv:
        setting = all_settings[-nback - 1]

        if setting.folder is None:
            msg = 'Warning: Recording Stop -> Start without folder change!'
        else:
            msg = setting.folder

        out_str = create_rep(setting.num2name, setting.name2num, setting.crefs,
                             setting.lrefs, setting.grefs)
        outfname = base_name + '_{:02d}.csv'.\
            format(len(all_settings) - nback - 1)
        with open(outfname, 'w') as outf:
            outf.write('# {} {} {}\n'.format(msg, setting.start_rec[1],
                                             setting.stop_rec[1]))
            csvwriter = csv_writer(outf)
            for line in out_str:
                csvwriter.writerow(line)

    if write_datetime:
        setting = all_settings[-nback - 1]
        date, start, stop = parse_times(setting)
        print(date, start, stop)
        if date is None:
            out = '# Date not guessed because Recording was stopped'\
                  ' and re-started without folder change!\n'

        else:
            out = '# {}\ncreate_folder {}\n'.\
                   format(setting.folder, date.strftime('%Y-%m-%d %H:%M:%S'))

        start_ts = setting.start_timestamp
        stop_ts = setting.stop_timestamp

        for name, d, t in (('start', start, start_ts), ('stop', stop,
                                                        stop_ts)):
            out += name + '_recording {} {} {}\n'.\
                   format(d.date().isoformat(), d.time().isoformat(), t)

        diff_time = (stop_ts - start_ts) / 1e6 - (stop - start).seconds

        out += 'cheetah_ahead: {}\n'.format(diff_time)

        if os.path.exists(DATE_FNAME):
            print('{} exists, not overwriting!'.format(DATE_FNAME))
        else:
            with open(DATE_FNAME, 'w') as fid:
                fid.write(out)
Beispiel #41
0
def akshay_summarize(name_pattern, group_by, av_nuc_p53, av_en_p53, av_nuc_p21, av_en_p21, output):
    with open(output, 'ab') as output_file:
        writer = csv_writer(output_file)
        for i, nuc_pac in enumerate(zip(av_nuc_p53, av_en_p53, av_nuc_p21, av_en_p21)):
            writer.writerow([name_pattern, group_by, i, nuc_pac[0], nuc_pac[1], nuc_pac[2], nuc_pac[3]])
Beispiel #42
0
 def _get_default_csv(self, open_file):
     # quoting=csv.QUOTE_MINIMAL - default
     return csv_writer(open_file, **self.csv_params)
Beispiel #43
0
def Kristen_render(name_pattern,
                   group_id,
                   mCherry,
                   extranuclear_mCherry_pad,
                   GFP_orig,
                   mCherry_orig, output,
                   save=False, directory_to_save_to='verification'):
    labels, _ = ndi.label(extranuclear_mCherry_pad)
    unique_segmented_cells_labels = np.unique(labels)[1:]
    mCherry_cutoff = np.zeros_like(mCherry)
    qualifying_cell_label = []
    qualifying_regression_stats = []

    for cell_label in unique_segmented_cells_labels:
        mCherry_2 = np.zeros_like(mCherry)
        my_mask = labels == cell_label
        average_apply_mask = np.mean(mCherry[my_mask])
        intensity = np.sum(mCherry[my_mask])
        binary_pad = np.zeros_like(mCherry)
        binary_pad[my_mask] = 1
        pixel = np.sum(binary_pad[my_mask])

        if (average_apply_mask > .05 or intensity > 300) and pixel > 4000:

            GFP_limited_to_cell_mask = cf._3d_stack_2d_filter(GFP_orig, my_mask)
            mCherry_limited_to_cell_mask = cf._3d_stack_2d_filter(mCherry_orig, my_mask)

            qualifying_3d_GFP = GFP_limited_to_cell_mask[mCherry_limited_to_cell_mask>50]
            average_3d_GFP = np.mean(qualifying_3d_GFP)
            median_3d_GFP = np.median(qualifying_3d_GFP)
            std_3d_GFP = np.std(qualifying_3d_GFP)
            sum_qualifying_GFP = np.sum(qualifying_3d_GFP)

            nonqualifying_3d_GFP = GFP_limited_to_cell_mask[mCherry_limited_to_cell_mask<=50]
            average_nonqualifying_3d_GFP = np.mean(nonqualifying_3d_GFP)
            median_nonqualifying_3d_GFP = np.median(nonqualifying_3d_GFP)
            std_nonqualifying_3d_GFP = np.std(nonqualifying_3d_GFP)
            sum_nonqualifying_GFP = np.sum(nonqualifying_3d_GFP)

            sum_total_GFP = sum_qualifying_GFP + sum_nonqualifying_GFP
            percent_qualifying_over_total_GFP = sum_qualifying_GFP/sum_total_GFP
            # report the percentage too or sums are sufficient?

            GFP_orig_qualifying = cf._3d_stack_2d_filter(GFP_orig, my_mask)
            mCherry_orig_qualifying = cf._3d_stack_2d_filter(mCherry_orig, my_mask)
            mCherry_1d = mCherry_orig_qualifying[mCherry_orig_qualifying > 50]
            GFP_1d = GFP_orig_qualifying[mCherry_orig_qualifying>50]
            regression_results = stats.linregress(GFP_1d, mCherry_1d)

            mCherry_2[my_mask] = mCherry[my_mask]
            mCherry_cutoff[my_mask] = mCherry[my_mask]
            qualifying_cell_label.append(cell_label)
            qualifying_regression_stats.append((regression_results[0], regression_results[2], regression_results[3]))

            name_pattern_split = name_pattern.split(' - ')
            transfection_label = name_pattern_split[0]
            cell_type = name_pattern_split[1]
            exp_time = name_pattern_split[2]
            image_number = name_pattern_split[4]

            with open(output, 'ab') as output_file:
                writer = csv_writer(output_file, delimiter='\t')
                writer.writerow([transfection_label, cell_type, exp_time, image_number, cell_label, sum_qualifying_GFP, sum_total_GFP, average_3d_GFP, median_3d_GFP, std_3d_GFP, average_nonqualifying_3d_GFP, median_nonqualifying_3d_GFP, std_nonqualifying_3d_GFP, regression_results[0], regression_results[2], regression_results[3]])

            plt.figure(figsize=(26.0, 15.0))
            plt.title('Kristen\'s Data')
            plt.suptitle(name_pattern)

            main_ax = plt.subplot(221)
            plt.subplot(221, sharex=main_ax, sharey=main_ax)
            plt.title('mCherry Binary')
            im = plt.imshow(extranuclear_mCherry_pad, interpolation='nearest', cmap = 'hot')
            plt.colorbar(im)
            plt.subplot(222, sharex=main_ax, sharey=main_ax)
            plt.title('mCherry')
            plt.imshow(mCherry, interpolation='nearest')
            plt.contour(extranuclear_mCherry_pad, [0.5], colors='k')
            plt.subplot(223)
            dplt.better2D_desisty_plot(GFP_1d, mCherry_1d)
            plt.title('mCherry Intensity as a Function of GFP Voxel')
            plt.xlabel('GFP Voxel')
            plt.ylabel('mCherry Intensity')
            plt.subplot(224, sharex=main_ax, sharey=main_ax)
            plt.title('mCherry-cutoff applied')
            plt.imshow(mCherry_2, interpolation='nearest')

            if not save:
                plt.show()

            else:
                name_puck = directory_to_save_to + '/' + 'Kristen-' + name_pattern+ '_cell' + str(cell_label)+ '.png'
                plt.savefig(name_puck)
                plt.close()
    plt.figure(figsize=(26.0, 15.0))
    main_ax = plt.subplot(121)
    plt.subplot(121, sharex=main_ax, sharey=main_ax)
    plt.suptitle('mCherry Before and After Qualifying Cell Cutoff is Applied')
    plt.title('mCherry')
    im = plt.imshow(mCherry, interpolation='nearest')
    plt.colorbar(im)
    plt.subplot(122, sharex=main_ax, sharey=main_ax)
    plt.title('mCherry')
    plt.imshow(mCherry_cutoff, interpolation='nearest')
    if not save:
        plt.show()

    else:
        name_puck = directory_to_save_to + '/' + 'Kristen-' + name_pattern + 'cutoff_app' + '.png'
        plt.savefig(name_puck)
        plt.close()

    return qualifying_regression_stats
Beispiel #44
0
    def write_results_csv_exploits_to_cve(
        self,
        results_to_write: list or dict,
        dest_dir: str,
        csv_file: str,
        hosts_results: dict,
        csv_dir=DefaultValues.CSV_RESULTS_DIRECTORY,
    ) -> None:
        """
        TL;DR: This function matches exploits to appropriate CVEs.
        In the more long way: this function firstly search for all
        products that connected with some particular CVE, for example,
        let it be CVE-2014-0160 and products like "OpenSSL, Apache, Nginx",
        any other, etc. Then, when all products are collected, we can
        match exploits to this CVEs and also to these products. On
        the finish, we will get results like:
        "CVE #1, List of products, Exploit #1, description"
        "CVE #1, List of products, Exploit #2, description"
        etc.
        :param results_to_write: this is CVE/Exploits collections with definitions
        :param dest_dir: destination dir to write results
        :param csv_file: file to save results
        :param hosts_results: results about all the scanned hosts
        :param csv_dir: directory to save CSVs
        :return: None
        """
        if not results_to_write:
            return
        vulnerabilities_mapping = {}
        for host, info in hosts_results.items():
            if not info.get("vulnerabilities"):
                continue
            for vulnerabilities_db, vulnerabilities_info in info.get(
                    "vulnerabilities").items():
                if not vulnerabilities_info:
                    continue
                list_of_vulns = vulnerabilities_info.keys()
                for vulnerability in list_of_vulns:
                    if vulnerabilities_mapping.get(vulnerability):
                        if (info.get("product")
                                not in vulnerabilities_mapping[vulnerability]):
                            vulnerabilities_mapping[vulnerability].append(
                                info.get("product"))
                    else:
                        vulnerabilities_mapping.update(
                            {vulnerability: [info.get("product")]})

        path_to_csv_file = Path(".").joinpath(dest_dir).joinpath(csv_dir)
        path_to_csv_file.mkdir(parents=True, exist_ok=True)
        path_to_csv_file = path_to_csv_file.joinpath(csv_file)
        with open(path_to_csv_file, mode="w", newline="") as result_csv_file:
            _writer = csv_writer(result_csv_file,
                                 delimiter=",",
                                 quotechar='"',
                                 quoting=QUOTE_ALL)
            _writer.writerow([
                "CVE with exploit",
                "Affected Products",
                "Exploit title",
                "Bulletin family",
                "Exploit description",
                "id",
                "Exploit HREF",
                "type",
                "CVSS Score",
                "CVSS Vector",
                "Vulners HREF",
            ])
            for cve, exploits in results_to_write.items():
                for exploit in exploits:
                    _writer.writerow([
                        cve,
                        ", ".join(vulnerabilities_mapping.get(cve)),
                        exploit.get("title"),
                        exploit.get("bulletinFamily"),
                        exploit.get("description"),
                        exploit.get("id"),
                        exploit.get("href"),
                        exploit.get("type"),
                        exploit.get("cvss", {}).get("score"),
                        exploit.get("cvss", {}).get("vector"),
                        exploit.get("vhref"),
                    ])
Beispiel #45
0
 def __init__(self, filepath, header):
     self.filepath = filepath
     self.header = header
     with open(filepath, 'w') as file:
         writer = csv_writer(file)
         writer.writerow(header)
Beispiel #46
0
def amass_csv(amass_in, csv_out, by_ip=False):
    """ Returns Amass results in Excell compatable CSV format
    Param amass_in (str): the Amass data to convert to CSV
    Param csv_out (str): the path to the CSV output file
    """
    amass = []
    with open(amass_in, 'r') as fh:
        for line in fh:
            amass.append(json_loads(line))

    with open(csv_out, 'w', newline='') as fh:
        amasswriter = csv_writer(fh, dialect='excel')
        amasswriter.writerow(['name', 'domain', 'ip', 'cidr',
                            'asn', 'desc', 'tag', 'source'])

        write_count = 0
        for row in amass:
            name = row['name']
            domain = row['domain']
            addresses = row['addresses']
            ip = []
            cidr = []
            asn = []
            desc = []
            tag = ''
            source = []

            for address in addresses:
                ip.append(address['ip'])
                cidr.append(address['cidr'])
                asn.append(str(address['asn']))
                desc.append(address['desc'])

            tag = row['tag']

            # the old format did not use a [list] for source
            if 'sources' in row:
                source = row['sources']
            elif 'source' in row:
                source.append(row['source'])

            if by_ip:
                for i, d in enumerate(ip):
                    amasswriter.writerow([name,
                        domain,
                        d,
                        cidr[i],
                        asn[i],
                        desc[i],
                        tag,
                        source[0]])
                    write_count += 1
            else:
                amasswriter.writerow([name,
                    domain,
                    '\r\n'.join(ip),
                    '\r\n'.join(cidr),
                    '\r\n'.join(asn),
                    '\r\n'.join(desc),
                    tag,
                    '\r\n'.join(source)])
                write_count += 1
    return write_count
Beispiel #47
0
def predict_folder(img_dir,
                   model_dir,
                   progress_hook=None,
                   move=True,
                   csv=False):
    """
	Run your model on a directory of images. This will also go through any images in existing subdirectories.
	Move each image into a subdirectory structure based on the prediction -- the predicted label
	becomes the directory name where the image goes.

	:param img_dir: the filepath to your directory of images.
	:param model_dir: path to the Lobe Tensorflow SavedModel export.
	:param progress_hook: an optional function that will be run with progress_hook(currentProgress, totalProgress) when progress updates.
	:param move: a flag for whether you want to physically move the image files into a subfolder structure based on the predicted label
	:param csv: a flag for whether you want to create an output csv showing the image filenames and their predictions
	"""
    print(f"Predicting {img_dir}")
    img_dir = os.path.abspath(img_dir)
    # if this a .txt file, don't treat the first row as a header. Otherwise, use the first row for header column names.
    if not os.path.isdir(img_dir):
        raise ValueError(
            f"Please specify a directory to images. Found {img_dir}")

    num_items = sum(len(files) for _, _, files in os.walk(img_dir))
    print(f"Predicting {num_items} items...")

    # load the model
    print("Loading model...")
    model = ImageModel.load(model_path=model_dir)
    print("Model loaded!")

    # create our output csv
    out_csv = os.path.join(img_dir, "predictions.csv")
    if csv:
        with open(out_csv, 'w', encoding="utf-8", newline='') as f:
            writer = csv_writer(f)
            writer.writerow(['File', 'Label', 'Confidence'])

    # iterate over the rows and predict the label
    curr_progress = 0
    no_labels = 0
    with tqdm(total=num_items) as pbar:
        with ThreadPoolExecutor() as executor:
            model_futures = []
            # make our prediction jobs
            for root, _, files in os.walk(img_dir):
                for filename in files:
                    image_file = os.path.abspath(os.path.join(root, filename))
                    model_futures.append(
                        (executor.submit(predict_label_from_image_file,
                                         image_file=image_file,
                                         model=model), image_file))

            for future, img_file in model_futures:
                label, confidence = future.result()
                if label is None:
                    no_labels += 1
                else:
                    # move the file
                    dest_file = img_file
                    if move:
                        filename = os.path.split(img_file)[-1]
                        name, ext = os.path.splitext(filename)
                        dest_dir = os.path.join(img_dir, label)
                        os.makedirs(dest_dir, exist_ok=True)
                        dest_file = os.path.abspath(
                            os.path.join(dest_dir, filename))
                        # only move if the destination is different than the file
                        if dest_file != img_file:
                            try:
                                # rename the file if there is a conflict
                                rename_idx = 0
                                while os.path.exists(dest_file):
                                    new_name = f'{name}_{rename_idx}{ext}'
                                    dest_file = os.path.abspath(
                                        os.path.join(dest_dir, new_name))
                                    rename_idx += 1
                                shutil.move(img_file, dest_file)
                            except Exception as e:
                                print(f"Problem moving file: {e}")
                    # write the results to a csv
                    if csv:
                        with open(out_csv, 'a', encoding="utf-8",
                                  newline='') as f:
                            writer = csv_writer(f)
                            writer.writerow([dest_file, label, confidence])
                pbar.update(1)
                if progress_hook:
                    curr_progress += 1
                    progress_hook(curr_progress, num_items)
    print(f"Done! Number of images without predicted labels: {no_labels}")
Beispiel #48
0
def write_dataset(dataset, filename):
    with open(filename, mode='w') as file:
        dataset_writer = csv_writer(file, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL)
        for row in dataset:
            dataset_writer.writerow(row)
Beispiel #49
0
def create_dataset(filepath,
                   url_col=None,
                   label_col=None,
                   progress_hook=None,
                   destination_directory=None):
    """
	Given a file with urls to images, downloads those images to a new directory that has the same name
	as the file without the extension. If labels are present, further categorizes the directory to have
	the labels as sub-directories.

	:param filepath: path to a valid txt or csv file with image urls to download.
	:param url_col: if this is a csv, the column header name for the urls to download.
	:param label_col: if this is a csv, the column header name for the labels of the images.
	:param progress_hook: an optional function that will be run with progress_hook(currentProgress, totalProgress) when progress updates.
	:param destination_directory: an optional directory path to download the dataset to.
	"""
    print(f"Processing {filepath}")
    filepath = os.path.abspath(filepath)
    filename, ext = _name_and_extension(filepath)
    # read the file
    # if this a .txt file, don't treat the first row as a header. Otherwise, use the first row for header column names.
    if ext != '.xlsx':
        csv = pd.read_csv(filepath, header=None if ext == '.txt' else 0)
    else:
        csv = pd.read_excel(filepath, header=0)
    if ext in ['.csv', '.xlsx'] and not url_col:
        raise ValueError(f"Please specify an image url column for the csv.")
    url_col_idx = 0
    if url_col:
        try:
            url_col_idx = list(csv.columns).index(url_col)
        except ValueError:
            raise ValueError(
                f"Image url column {url_col} not found in csv headers {csv.columns}"
            )
    label_col_idx = None
    if label_col:
        try:
            label_col_idx = list(csv.columns).index(label_col)
        except ValueError:
            raise ValueError(
                f"Label column {label_col} not found in csv headers {csv.columns}"
            )

    total_jobs = len(csv)
    print(f"Downloading {total_jobs} items...")

    errors = []
    dest = os.path.join(destination_directory,
                        filename) if destination_directory else filename

    # try/catch for keyboard interrupt
    try:
        # iterate over the rows and add to our download processing job!
        with tqdm(total=total_jobs) as pbar:
            with ThreadPoolExecutor() as executor:
                # for every image in the row, download it!
                download_futures = {}
                lock = Lock()
                for i, row in enumerate(csv.itertuples(index=False)):
                    # job is passed to our worker processes
                    index = i + 1
                    url = row[url_col_idx]
                    label = None
                    if label_col_idx:
                        label = row[label_col_idx]
                        label = None if pd.isnull(label) else label
                    download_futures[executor.submit(download_image,
                                                     url=url,
                                                     directory=dest,
                                                     lock=lock,
                                                     label=label)] = (index,
                                                                      url,
                                                                      label)

                # iterate over the results to update our progress bar and write any errors to the error csv
                num_processed = 0
                for future in as_completed(download_futures):
                    index, url, label = download_futures[future]
                    filename = future.result()
                    if not filename:
                        error_row = [index, url]
                        if label_col_idx:
                            error_row.append(label)
                        errors.append(error_row)
                    # update progress
                    pbar.update(1)
                    num_processed += 1
                    if progress_hook:
                        progress_hook(num_processed, total_jobs)

        print('Cleaning up...')
        # write out the error csv
        if len(errors) > 0:
            errors.sort()
            fname, ext = os.path.splitext(filepath)
            error_file = f"{fname}_errors.csv"
            with open(error_file, 'w', newline='') as f:
                header = f"index,url{',label' if label_col_idx else ''}\n"
                f.write(header)
                writer = csv_writer(f)
                writer.writerows(errors)

    except Exception:
        raise
Beispiel #50
0
def main(argv):
    ap = ArgumentParser(prog="sem-mse")
    ap.add_argument("resume_a")
    ap.add_argument("resume_b")
    args = ap.parse_args(argv)

    output_distributions = [
        i for i in pickler.load(os.path.join(args.resume_a, OUTPUT))
    ]
    assert len(output_distributions) == 1
    output_distribution = output_distributions[0]
    distributions_basenames = [
        os.path.basename(p)
        for p in glob.glob(os.path.join(args.resume_a, DISTRIBUTIONS_GLOB))
    ]
    size = None
    uniform_distribution = None
    count = 0
    comparison_total = 0.0
    uniform_total_a = 0.0
    uniform_total_b = 0.0
    distribution_total_a = 0.0
    distribution_total_b = 0.0

    for distributions_basename in sorted(distributions_basenames,
                                         key=file_sort_key):
        stream_a = pickler.load(
            os.path.join(args.resume_a, distributions_basename))
        stream_b = pickler.load(
            os.path.join(args.resume_b, distributions_basename))

        for distribution_a, distribution_b in zip(stream_a, stream_b):
            assert len(distribution_a) == len(distribution_b)

            if size is None:
                size = len(distribution_a)
                value = 1.0 / size
                uniform_distribution = {value for key in distribution_a.keys()}

            comparison_total += sum_squared_error(distribution_a,
                                                  distribution_b)
            uniform_total_a += sum_squared_error(distribution_a,
                                                 uniform_distribution)
            uniform_total_b += sum_squared_error(distribution_b,
                                                 uniform_distribution)
            distribution_total_a += sum_squared_error(distribution_a,
                                                      output_distribution)
            distribution_total_b += sum_squared_error(distribution_b,
                                                      output_distribution)
            count += 1

        try:
            next(stream_a)
            raise ValueError("stream a wasn't exhausted!")
        except StopIteration as e:
            pass

        try:
            next(stream_b)
            raise ValueError("stream b wasn't exhausted!")
        except StopIteration as e:
            pass

    with open("output-sem-mse-analysis.csv", "w") as fh:
        writer = csv_writer(fh)
        writer.writerow([
            "comparison", "sum of squared error", "mean squared error",
            "mse normalized"
        ])
        writer.writerow(row_data("comparison", comparison_total, count, size))
        writer.writerow(row_data("uniform a", uniform_total_a, count, size))
        writer.writerow(row_data("uniform b", uniform_total_b, count, size))
        writer.writerow(
            row_data("distribution a", distribution_total_a, count, size))
        writer.writerow(
            row_data("distribution b", distribution_total_b, count, size))

    return 0
Beispiel #51
0
 def _create_csv_writer(cls, buffer):
     return csv_writer(buffer, delimiter=cls.field_delimiter, lineterminator=cls.row_delimiter)
Beispiel #52
0
def save_games(games: List[CSVGame], csv_filename: str = GAMES_CSV) -> None:
    with open(csv_filename, 'w', newline='') as csv_file:
        writer = csv_writer(csv_file)
        writer.writerow(CSVGame._fields)  # Write headers.
        writer.writerows(games)
Beispiel #53
0
def download_hiwat(request):
    """
        Get hiwat data
    """

    print(request)
    get_data = request.GET

    try:
        comid = get_data['comid']
        startdate = get_data['startdate']
        country = 'Nepal'
        model = 'Hiwat'

        # path = os.path.join(app.get_custom_setting('forescast_data'))
        #Added for the THREEDS SERVER
        path = os.path.join(app.get_app_workspace().path, 'forecast')

        filename = [f for f in os.listdir(path) if 'Qout_hiwat' in f]
        filename.reverse()
        selectedDate = int(startdate)
        filename = filename[selectedDate]

        #filename = filename[0]

        file = path + '/' + filename

        res = nc.Dataset(file, 'r')

        dates_raw = res.variables['time'][:]
        dates = []
        for d in dates_raw:
            dates.append(dt.datetime.fromtimestamp(d))

        comid_list = res.variables['rivid'][:]
        comid_index = int(np.where(comid_list == int(comid))[0])

        values = []
        for l in list(res.variables['Qout'][:]):
            values.append(float(l[comid_index]))

        pairs = [list(a) for a in zip(dates, values)]

        response = HttpResponse(content_type='text/csv')

        response[
            'Content-Disposition'] = 'attachment; filename={0}-{1}-{2}.csv'.format(
                country, model, comid)

        writer = csv_writer(response)

        writer.writerow(['datetime', 'streamflow (m3/s)'])

        for row_data in pairs:
            writer.writerow(row_data)

        return response

    except Exception as e:
        print(e)
        return JsonResponse(
            {'error': 'No HIWAT data found for the selected reach.'})
Beispiel #54
0
def predict_dataset(filepath, model_dir, url_col=None, progress_hook=None):
    """
	Given a file with urls to images, predict the given SavedModel on the image and write the label
	and confidene back to the file.

	:param filepath: path to a valid txt or csv file with image urls to download.
	:param model_dir: path to the Lobe Tensorflow SavedModel export.
	:param url_col: if this is a csv, the column header name for the urls to download.
	:param progress_hook: an optional function that will be run with progress_hook(currentProgress, totalProgress) when progress updates.
	"""
    print(f"Predicting {filepath}")
    filepath = os.path.abspath(filepath)
    filename, ext = _name_and_extension(filepath)
    # read the file
    # if this a .txt file, don't treat the first row as a header. Otherwise, use the first row for header column names.
    if ext != '.xlsx':
        csv = pd.read_csv(filepath, header=None if ext == '.txt' else 0)
    else:
        csv = pd.read_excel(filepath, header=0)
    if ext in ['.csv', '.xlsx'] and not url_col:
        raise ValueError(f"Please specify an image url column for the csv.")
    url_col_idx = 0
    if url_col:
        try:
            url_col_idx = list(csv.columns).index(url_col)
        except ValueError:
            raise ValueError(
                f"Image url column {url_col} not found in csv headers {csv.columns}"
            )

    num_items = len(csv)
    print(f"Predicting {num_items} items...")

    # load the model
    print("Loading model...")
    model = ImageModel.load(model_path=model_dir)
    print("Model loaded!")

    # create our output csv
    fname, ext = os.path.splitext(filepath)
    out_file = f"{fname}_predictions.csv"
    with open(out_file, 'w', encoding="utf-8", newline='') as f:
        # our header names from the pandas columns
        writer = csv_writer(f)
        writer.writerow([
            *[str(col) if not pd.isna(col) else '' for col in csv.columns],
            'label', 'confidence'
        ])

    # iterate over the rows and predict the label
    with tqdm(total=len(csv)) as pbar:
        with ThreadPoolExecutor() as executor:
            model_futures = []
            # make our prediction jobs
            for i, row in enumerate(csv.itertuples(index=False)):
                url = row[url_col_idx]
                model_futures.append(
                    executor.submit(predict_image_url,
                                    url=url,
                                    model=model,
                                    row=row))

            # write the results from the predict (this should go in order of the futures)
            for i, future in enumerate(model_futures):
                label, confidence, row = future.result()
                with open(out_file, 'a', encoding="utf-8", newline='') as f:
                    writer = csv_writer(f)
                    writer.writerow([
                        *[str(col) if not pd.isna(col) else '' for col in row],
                        label, confidence
                    ])
                pbar.update(1)
                if progress_hook:
                    progress_hook(i + 1, len(csv))
Beispiel #55
0

if gene_to_id_file_location:
    with open(gene_to_id_file_location, 'r') as source:
        reader = csv_reader(source, delimiter='\t')
        print reader.next()
        for line in reader:
            genes_to_ids_dict[line[2]] = line[0]


with open(data_source_location, 'r') as source:
    reader = csv_reader(source)
    for i, line in enumerate(reader):
        word = line[0]
        if gene_to_id_file_location:
            word = genes_to_ids_dict.get(word, 'None found')
        if word in high_conf_translation_dict.keys():
            high_conf_trans.append(high_conf_translation_dict[word])
        if word in low_conf_translation_dict.keys():
            low_conf_trans.append(low_conf_translation_dict[word])

print "out of %s, %s were translated with high confidence, %s with low and %s were not found" % \
      (i, len(high_conf_trans), len(low_conf_trans), i-len(high_conf_trans)-len(low_conf_trans))

with open(data_dump_location, 'w') as destination:
    writer = csv_writer(destination)
    writer.writerows((word for word in high_conf_trans))



Beispiel #56
0
def main(path_pairs, path_definitions, path_dense_fevents, path_info,
         output_path, timings_path):
    # Initialize the CSV output
    line_buffering = 1
    res_file = open(output_path, "x", buffering=line_buffering)
    res_writer = init_csv(res_file)

    # File that keep tracks of how much time was spent on each endpoint
    timings_file = open(timings_path, "x", buffering=line_buffering)
    timings_writer = csv_writer(timings_file)
    timings_writer.writerow(
        ["prior", "outcome", "lag", "step_size", "time_seconds"])

    # Load all data
    pairs, endpoints, df_events, df_info = load_data(path_pairs,
                                                     path_definitions,
                                                     path_dense_fevents,
                                                     path_info)

    # Initialize the job queue
    jobs = LifoQueue()
    for pair in pairs:
        for lag in LAGS:
            jobs.put({
                "pair": pair,
                "lag": lag,
                "step_size": DEFAULT_STEP_SIZE
            })

    # Keep track if the current endpoint pair needs to be skipped
    skip = None

    # Run the regression for each job
    while not jobs.empty():
        time_start = now()

        # Get job info
        job = jobs.get()
        pair = job["pair"]
        lag = job["lag"]
        step_size = job["step_size"]

        # Go to next endpoint pair if this one is to be skipped
        if pair == skip:
            continue

        logger.info(f"Jobs remaining: ~ {jobs.qsize()}")
        logger.info(
            f"[JOB] pair: {pair} | lag: {lag} | step size: {step_size}")
        prior, outcome = pair
        is_sex_specific = pd.notna(endpoints.loc[endpoints.NAME == outcome,
                                                 "SEX"].iloc[0])

        time_start = now()
        try:
            (df_unexp, df_unexp_death, df_unexp_exp_p1, df_unexp_exp_p2,
             df_tri_p1, df_tri_p2) = prep_coxhr(pair, lag, df_events, df_info)

            nindivs, df_lifelines = prep_lifelines(df_unexp, df_unexp_death,
                                                   df_unexp_exp_p1,
                                                   df_unexp_exp_p2, df_tri_p1,
                                                   df_tri_p2)
            compute_coxhr(pair, df_lifelines, lag, step_size, is_sex_specific,
                          nindivs, res_writer)
        except NotEnoughIndividuals as exc:
            skip = pair  # skip remaining jobs (different lags) for this endpoint pair
            logger.warning(exc)
        except (ConvergenceError, Warning) as exc:
            # Retry with a lower step_size
            if step_size == DEFAULT_STEP_SIZE:
                step_size = LOWER_STEP_SIZE
                jobs.put({"pair": pair, "lag": lag, "step_size": step_size})
            # We already tried with the lower step size, we have to skip this job
            else:
                logger.warning(
                    f"Failed to run Cox.fit() for {pair}, lag: {lag}, step size: {step_size}:\n{exc}"
                )
        finally:
            job_time = now() - time_start
            timings_writer.writerow([prior, outcome, lag, step_size, job_time])

    timings_file.close()
    res_file.close()