def _main(osm, auth, sections): group = Group(osm, auth, MAPPING.keys(), None) for section in sections: assert section in group.SECTIONIDS.keys(), \ "section must be in {!r}.".format(group.SECTIONIDS.keys()) contacts = [] for section in sections: section_contacts = [member2contacts(member, section) for member in group.section_all_members(section)] # flatten list of lists. contacts += list(itertools.chain(*section_contacts)) # Remove blank emails contacts = [contact for contact in contacts if contact[2].strip() != "" ] # remove duplicates by_email = {contact[2]: contact for contact in contacts} contacts = list(by_email.values()) w = csv_writer(sys.stdout) w.writerows(contacts)
def member_badges(osm, auth, firstname, lastname, csv=False, no_headers=False, term=None): group = Group(osm, auth, MAPPING.keys(), term) members = group.find_by_name(firstname, lastname) # member = members[-1] rows = [] for member in members: for section_type in ('beavers', 'cubs', 'scouts'): try: badges = member.get_badges(section_type=section_type) if badges is not None: for badge in [_ for _ in badges if _['awarded'] == '1']: rows.append([member['date_of_birth'], member['last_name'], member['age'], section_type, member._section['sectionname'], badge['badge'], datetime.date.fromtimestamp(int(badge['awarded_date'])).isoformat()]) except: import traceback traceback.print_exc() pass headers = ["DOB", "Last Name", "Age", "Section Type", "Section Name", "Badge"] if csv: w = csv_writer(sys.stdout) if not no_headers: w.writerow(list(headers)) w.writerows(rows) else: if not no_headers: print(tabulate.tabulate(rows, headers=headers)) else: print(tabulate.tabulate(rows, tablefmt="plain"))
def main(): for infilename in sys.argv[1:]: outfilename = sub("\.csv", "_pad.csv", infilename) prev_dt = -1 week = timedelta(days=7) one = timedelta(days=1) with open(outfilename, "wb") as outfile: w = csv_writer(outfile) with open(infilename, "rb") as infile: r = csv_reader(infile) header = r.next() w.writerow(header) for row in r: dt = dt_parser.parse(row[0]) if prev_dt != -1: # we're past the first line... compare! diff = dt - prev_dt if diff > one: for i in reversed(range(diff.days - 1)): wahoo = timedelta(days=(i+1)) pad = dt - wahoo #print >> sys.stderr, "padding:%s" % pad w.writerow([_get_dt_str(pad), 0]) w.writerow([_get_dt_str(dt), row[1]]) prev_dt = dt
def CreateConstMuskingumXFile(x_value, in_connectivity_file, out_x_file): """ Create muskingum X file from value that is constant all the way through for each river segment. Args: x_value(float): Value for the muskingum X parameter [0-0.5]. in_connectivity_file(str): The path to the RAPID connectivity file. out_x_file(str): The path to the output x file. Example:: from RAPIDpy.gis.muskingum import CreateConstMuskingumXFile #------------------------------------------------------------------------------ #main process #------------------------------------------------------------------------------ if __name__ == "__main__": CreateConstMuskingumXFile(x_value=0.3, in_connectivity_file='/path/to/rapid_connect.csv', out_x_file='/path/to/x.csv', ) """ num_rivers = 0 with open_csv(in_connectivity_file, "r") as csvfile: reader = csv_reader(csvfile) for row in reader: num_rivers+=1 with open_csv(out_x_file,'w') as kfile: x_writer = csv_writer(kfile) for idx in xrange(num_rivers): x_writer.writerow([x_value])
def StreamIDNextDownIDToConnectivity(stream_id_array, next_down_id_array, out_csv_file): """ Creates RAPID connect file from stream_id array and next down id array """ list_all = [] max_count_upstream = 0 for hydroid in np.sort(stream_id_array): # find the HydroID of the upstreams list_upstreamID = stream_id_array[next_down_id_array==hydroid] # count the total number of the upstreams count_upstream = len(list_upstreamID) if count_upstream > max_count_upstream: max_count_upstream = count_upstream nextDownID = next_down_id_array[stream_id_array==hydroid][0] #THIS IS REMOVED DUE TO THE FACT THAT THERE ARE STREAMS WITH ID OF ZERO # # replace the nextDownID with 0 if it equals to -1 (no next downstream) # if nextDownID == -1: # nextDownID = 0 # append the list of Stream HydroID, NextDownID, Count of Upstream ID, and HydroID of each Upstream into a larger list list_all.append(np.concatenate([np.array([hydroid,nextDownID,count_upstream]),list_upstreamID]).astype(int)) with open_csv(out_csv_file,'w') as csvfile: connectwriter = csv_writer(csvfile) for row_list in list_all: out = np.concatenate([row_list, np.array([0 for i in xrange(max_count_upstream - row_list[2])])]) connectwriter.writerow(out.astype(int))
def CreateMuskingumKFile(lambda_k, in_kfac_file, out_k_file): """ Creates muskingum k file from kfac file. Args: lambda_k(float): The value for lambda given from RAPID after the calibration process. If no calibration has been performed, 0.35 is reasonable. in_kfac_file(str): The path to the input kfac file. out_k_file(str): The path to the output k file. Example:: from RAPIDpy.gis.muskingum import CreateMuskingumKFile #------------------------------------------------------------------------------ #main process #------------------------------------------------------------------------------ if __name__ == "__main__": CreateMuskingumKFile(lambda_k=0.35, in_kfac_file='/path/to/kfac.csv', out_k_file='/path/to/k.csv', ) """ kfac_table = csv_to_list(in_kfac_file) with open_csv(out_k_file,'w') as kfile: k_writer = csv_writer(kfile) for row in kfac_table: k_writer.writerow([lambda_k*float(row[0])])
def to_csv(self): buf = BytesIO() w = csv_writer(buf) w.writerow([_('Style Rule'), _('Number of matches')]) for r in xrange(self.proxy.rowCount()): entry = self.proxy.mapToSource(self.proxy.index(r, 0)).data(Qt.UserRole) w.writerow([entry.rule.selector, entry.count]) return buf.getvalue()
def to_csv(self): buf = BytesIO() w = csv_writer(buf) w.writerow([_('Class'), _('Number of matches')]) for r in xrange(self.proxy.rowCount()): entry = self.proxy.mapToSource(self.proxy.index(r, 0)).data(Qt.UserRole) w.writerow([entry.cls, entry.num_of_matches]) return buf.getvalue()
def export_csv(self): out = StringIO() csv = csv_writer(out) attributes = self.attributes() headers = ["When"] + attributes csv.writerow(headers) for entry in self.entries(): csv.writerow([entry.when] + [entry.attrs.get(x, "") for x in attributes]) return out.getvalue()
def to_csv(self): buf = BytesIO() w = csv_writer(buf) w.writerow(self.proxy.sourceModel().COLUMN_HEADERS) cols = self.proxy.columnCount() for r in xrange(self.proxy.rowCount()): items = [self.proxy.index(r, c).data(Qt.DisplayRole) for c in xrange(cols)] w.writerow(items) return buf.getvalue()
def write_csv(csv_file, rows, encoding=r'UTF-8'): from csv import writer as csv_writer from os import rename work = csv_file + '-' with open(work, r'wt', encoding=encoding) as ostream: w = csv_writer(ostream) for row in rows: w.writerow(row) rename(work, csv_file)
def compress_csv(in_path, n, out_path): """Compresses n csv rows into one""" header, rows = csv_elems(in_path) new_header = duplicate_header(header, n) new_rows = group_rows(rows, n) writer = csv_writer(open(out_path, 'w'), delimiter=',', quotechar='\"') writer.writerow(new_header) for row in new_rows: writer.writerow(row)
def saveCSV(self): print "[Birthday Reminder] exporting CSV file", CSVFILE try: csvFile = open(CSVFILE, "wb") writer = csv_writer(csvFile) writer.writerows(self.birthdaytimer.getBirthdayList()) csvFile.close() self.session.open(MessageBox, _("Wrote CSV file %s.") % CSVFILE, MessageBox.TYPE_INFO) except: self.session.open(MessageBox, _("Can't write CSV file %s.") % CSVFILE, MessageBox.TYPE_ERROR)
def serialize_csv_trace_obsels(graph, resource, bindings=None): sio = StringIO() csvw = csv_writer(sio) for row in iter_csv_rows(resource.trace.uri, graph): csvw.writerow([ i.encode('utf-8') for i in row ]) # immediately yield each line yield sio.getvalue() # then empty sio before writing next line sio.reset() sio.truncate()
def FlowlineToPoint(in_drainage_line, river_id, out_csv_file, file_geodatabase=None): """ Converts flowline feature to a list of centroid points with their comid in EPSG:4326. Args: in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). out_csv_file(str): Path to the output csv file with the centroid points. file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) Example:: from RAPIDpy.gis.centroid import FlowlineToPoint #------------------------------------------------------------------------------ #main process #------------------------------------------------------------------------------ if __name__ == "__main__": FlowlineToPoint(in_drainage_line='/path/to/drainageline.shp', river_id='LINKNO', out_csv_file='/path/to/comid_lat_lon_z.csv', ) """ if file_geodatabase: gdb_driver = ogr.GetDriverByName("OpenFileGDB") ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0) ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) else: ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() ogr_drainage_line_shapefile_lyr_proj = ogr_drainage_line_shapefile_lyr.GetSpatialRef() osr_geographic_proj = osr.SpatialReference() osr_geographic_proj.ImportFromEPSG(4326) proj_transform = None if ogr_drainage_line_shapefile_lyr_proj != osr_geographic_proj: proj_transform = osr.CoordinateTransformation(ogr_drainage_line_shapefile_lyr_proj, osr_geographic_proj) #print valid field names to table with open_csv(out_csv_file, 'w') as outfile: writer = csv_writer(outfile) writer.writerow(['rivid','lat','lon','z']) for feature in ogr_drainage_line_shapefile_lyr: feat_geom = feature.GetGeometryRef() if proj_transform: feat_geom.Transform(proj_transform) centroid = feat_geom.Centroid() centroid_pt = centroid.GetPoint(0) writer.writerow([feature.GetField(river_id), centroid_pt[1], centroid_pt[0], centroid_pt[2]])
def main(): req = requests.get('http://summaries.cochrane.org/search/site/chocolate') soup = BeautifulSoup(req.text) categories = get_categories_for_search(soup) filename = 'categories.csv' with open(filename, 'w') as csv_file: for category in categories: spamwriter = csv_writer(csv_file) spamwriter.writerow([category])
def parse_results(in_path, out_path, threshold, hedge_path): """Parse results csv file from CrowdFlower""" hedge_types = load_hedge_types(hedge_path) hits = load_units(in_path) with open(out_path, 'w') as csv_file: writer = csv_writer(csv_file, delimiter='\t', quotechar='\"') writer.writerow(['segment', 'proposition', 'belief_type']) for hedge, sent, judgement, confidence in hits: if confidence > threshold: if judgement == 'yes': writer.writerow([change_tag(sent), hedge, hedge_types[hedge]]) else: writer.writerow([change_tag(sent), hedge, 'NH'])
def list_to_csv(ls, path, hedge_path): hedge_types = load_hedge_types(hedge_path) writer = csv_writer(open(path, 'w'), delimiter='\t', quotechar='\"') writer.writerow([ 'segment', 'proposition', 'belief_type']) for unit, judgements in ls: hedge, sent = unit yes, no = judgements['TRUE'], judgements['FALSE'] if yes > no: writer.writerow([change_tag(sent), hedge, hedge_types[hedge]]) else: writer.writerow([change_tag(sent), hedge, 'NH'])
def movers_list(osm, auth, sections, age=None, term=None, csv=False, no_headers=False): group = Group(osm, auth, MAPPING.keys(), term) rows = [] for section in sections: section_ = group._sections.sections[Group.SECTIONIDS[section]] headers = ['firstname', 'lastname', 'real_age', 'dob', "Date Parents Contacted", "Parents Preference", "Date Leaders Contacted", "Agreed Section", "Starting Date", "Leaving Date", "Notes", "Priority", '8', '10 1/2', '14 1/2'] movers = section_.movers if age: threshold = (365 * float(age)) now = datetime.datetime.now() age_fn = lambda dob: (now - datetime.datetime.strptime(dob, '%Y-%m-%d')).days movers = [mover for mover in section_.movers if age_fn(mover['dob']) > threshold] now = datetime.datetime.now() for mover in movers: real_dob = datetime.datetime.strptime(mover['dob'], '%Y-%m-%d') rel_age = relativedelta.relativedelta(now, real_dob) mover['real_age'] = "{0:02d}.{0:02d}".format(rel_age.years, rel_age.months) mover['8'] = (real_dob+relativedelta.relativedelta(years=8)).strftime("%b %y") mover['10 1/2'] = (real_dob + relativedelta.relativedelta(years=10, months=6)).strftime("%b %y") mover['14 1/2'] = (real_dob + relativedelta.relativedelta(years=14, months=6)).strftime("%b %y") rows += [[section_['sectionname']] + [member[header] for header in headers] for member in movers] headers = ["Current Section"] + headers if csv: w = csv_writer(sys.stdout) if not no_headers: w.writerow(list(headers)) w.writerows(rows) else: if not no_headers: print(tabulate.tabulate(rows, headers=headers)) else: print(tabulate.tabulate(rows, tablefmt="plain"))
def process(self, files): writer = csv_writer(self.fout, dialect=excel) # Write the header writer.writerow(self.fields) for banner in iterate_files(files): try: row = [] for field in self.fields: value = self.banner_field(banner, field) row.append(value) writer.writerow(row) except Exception: pass
def sightings_to_csv(since=None, output='sightings.csv'): from csv import writer as csv_writer if since: conf.REPORT_SINCE = since with session_scope() as session: sightings = get_sightings_per_pokemon(session) od = OrderedDict() for pokemon_id in range(1, 252): if pokemon_id not in sightings: od[pokemon_id] = 0 od.update(sightings) with open(output, 'wt') as csvfile: writer = csv_writer(csvfile) writer.writerow(('pokemon_id', 'count')) for item in od.items(): writer.writerow(item)
def process(self, files): writer = csv_writer(self.fout, dialect=excel) # Write the header writer.writerow(self.fields) for banner in iterate_files(files): # The "vulns" property can't be nicely flattened as-is so we turn # it into a list before processing the banner. if 'vulns' in banner: banner['vulns'] = banner['vulns'].keys() try: row = [] for field in self.fields: value = self.banner_field(banner, field) row.append(value) writer.writerow(row) except Exception: pass
def contacts_detail(osm, auth, sections, csv=False, term=None, no_headers=False): group = Group(osm, auth, MAPPING.keys(), term) section_map = {'Garrick': 'Beavers', 'Paget': 'Beavers', 'Swinfen': 'Beavers', 'Maclean': 'Cubs', 'Somers': 'Cubs', 'Rowallan': 'Cubs', 'Erasmus': 'Scouts', 'Boswell': 'Scouts', 'Johnson': 'Scouts'} rows = [] def add_row(section, member): rows.append([section_map[section], section, member['first_name'], member['last_name'], member['date_of_birth'], member['contact_primary_1.email1'], member['contact_primary_1.address1'], member['contact_primary_1.address2'], member['contact_primary_1.address3'], member['contact_primary_1.postcode'], member['contact_primary_2.address1'], member['floating.gender'].lower()]) for section in sections: for member in group.section_all_members(section): add_row(section, member) headers = ["Section", "Section Name", "First", "Last", "DOB", "Email1", "Address1", "Address1.1", "Address1.2", "Address1.3", "Address2", "Address3", "Gender"] if csv: w = csv_writer(sys.stdout) if not no_headers: w.writerow(list(headers)) w.writerows(rows) else: if not no_headers: print(tabulate.tabulate(rows, headers=headers)) else: print(tabulate.tabulate(rows, tablefmt="plain"))
def events_attendees(osm, auth, sections, event, term=None, csv=False, attending_only=False, no_headers=False): group = Group(osm, auth, MAPPING.keys(), term) for section in sections: section_ = group._sections.sections[Group.SECTIONIDS[section]] ev = section_.events.get_by_name(event) if not ev: log.error("No such event: {}".format(event)) sys.exit(0) attendees = ev.attendees mapping = ev.fieldmap if attending_only: attendees = [attendee for attendee in attendees if attendee['attending'] == "Yes"] extra_fields = { 'patrol': 'Six', 'age': 'Age', } def fields(attendee): out = [str(attendee[_[1]]) for _ in mapping] + \ [section_.members.get_by_event_attendee(attendee)[_] for _ in extra_fields.keys()] return out output = [fields(attendee) for attendee in attendees if section_.members.is_member(attendee['scoutid'])] headers = [_[0] for _ in mapping] + list(extra_fields.values()) if csv: w = csv_writer(sys.stdout) if not no_headers: w.writerow(list(headers)) w.writerows(output) else: if not no_headers: print(tabulate.tabulate(output, headers=headers)) else: print(tabulate.tabulate(output, tablefmt="plain"))
def csv(request): # Create the HttpResponse object with the appropriate CSV header. response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="quantify.csv"' writer = csv_writer(response,delimiter=';',quotechar='"',quoting=QUOTE_NONNUMERIC) cols = ['Date'] for field in Field.objects.all(): cols.append(field.name.encode('utf-8')) writer.writerow(cols) for entry in Entry.objects.all(): row = [entry.date] for col in cols[1:]: try: row.append(entry.records.get(field__name=col).value) except Record.DoesNotExist: row.append('') writer.writerow(row) return response
def members_badges(osm, auth, sections, csv=False, no_headers=False, term=None): group = Group(osm, auth, MAPPING.keys(), term) for section in sections: # members = group._sections.sections[Group.SECTIONIDS[section]].members members = group.section_yp_members_without_leaders(section) rows = [] for member in members: badges = member.get_badges(section_type=group.SECTION_TYPE[section]) if badges: # If no badges - probably a leader challenge_new = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '1' and not badge['badge'].endswith('(Pre 2015)')]) challenge_old = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '1' and badge['badge'].endswith('(Pre 2015)')]) activity = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '2']) staged = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '3']) core = len([badge for badge in badges if badge['awarded'] == '1' and badge['badge_group'] == '4']) rows.append([member['date_of_birth'], member['last_name'], member['age'], section, challenge_new, challenge_old, activity, staged, core]) headers = ["DOB", "Last Name", "Age", "Section Name", "Challenge", "Challenge_old", "Staged", "Activity", "Core"] if csv: w = csv_writer(sys.stdout) if not no_headers: w.writerow(list(headers)) w.writerows(rows) else: if not no_headers: print(tabulate.tabulate(rows, headers=headers)) else: print(tabulate.tabulate(rows, tablefmt="plain"))
def CreateMuskingumXFileFromDranageLine(in_drainage_line, x_id, out_x_file, file_geodatabase=None): """ Create muskingum X file from drainage line. Args: in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. x_id(str): The name of the muksingum X field (i.e. 'Musk_x'). out_x_file(str): The path to the output x file. file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) Example:: from RAPIDpy.gis.muskingum import CreateMuskingumXFileFromDranageLine #------------------------------------------------------------------------------ #main process #------------------------------------------------------------------------------ if __name__ == "__main__": CreateMuskingumXFileFromDranageLine(in_drainage_line='/path/to/drainageline.shp', x_id='Musk_x', out_x_file='/path/to/x.csv', ) """ if file_geodatabase: gdb_driver = ogr.GetDriverByName("OpenFileGDB") ogr_file_geodatabase = gdb_driver.Open(file_geodatabase) ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) else: ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() with open_csv(out_x_file,'w') as kfile: x_writer = csv_writer(kfile) for drainage_line_feature in ogr_drainage_line_shapefile_lyr: x_writer.writerow([drainage_line_feature.GetField(x_id)])
def download_monthlyData(request): """ Get data for stations """ get_data = request.GET try: codEstacion = get_data['stationcode'] nomEstacion = get_data['stationname'] nomCountry = get_data['countryname'] dir_base = os.path.dirname(__file__) url = os.path.join(dir_base, 'public/Data', codEstacion + '-MONTHLY.csv') ''' with open(url) as csvfile: readCSV = csv.reader(csvfile, delimiter=',') readCSV.next() datesDischarge = [] dataDischarge = [] for row in readCSV: da = row[0] year = int(da[0:4]) month = int(da[5:7]) day = int(da[8:10]) dat = row[1] dat = float(dat) if dat < 0: dat = np.nan dat = str(dat) datesDischarge.append(dt.datetime(year, month, day)) dataDischarge.append(dat) ''' df = pd.read_csv(url, index_col=0) df.index = pd.to_datetime(df.index) df = df[df.iloc[:, 0] >= 0] datesDischarge = df.index.tolist() dataDischarge = df.iloc[:, 0].values pairs = [list(a) for a in zip(datesDischarge, dataDischarge)] response = HttpResponse(content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename=monthly_data_{0}-{1}-{2}.csv'.format( nomEstacion, codEstacion, nomCountry) writer = csv_writer(response) writer.writerow(['datetime', 'streamflow (m3/s)']) for row_data in pairs: writer.writerow(row_data) return response except Exception as e: print(str(e)) return JsonResponse( {'error': 'No observed data found for the selected station.'})
def FlowlineToPoint(in_drainage_line, river_id, out_csv_file, file_geodatabase=None): """ Converts flowline feature to a list of centroid points with their rivid in EPSG:4326. Parameters ---------- in_drainage_line: str Path to the stream network (i.e. Drainage Line) shapefile. river_id: str The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). out_csv_file: str Path to the output csv file with the centroid points. file_geodatabase: str, optional Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class (WARNING: Not always stable with GDAL). Example:: from RAPIDpy.gis.centroid import FlowlineToPoint FlowlineToPoint( in_drainage_line='/path/to/drainageline.shp', river_id='LINKNO', out_csv_file='/path/to/comid_lat_lon_z.csv') """ ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ open_shapefile(in_drainage_line, file_geodatabase) ogr_drainage_line_shapefile_lyr_proj = \ ogr_drainage_line_shapefile_lyr.GetSpatialRef() osr_geographic_proj = osr.SpatialReference() osr_geographic_proj.ImportFromEPSG(4326) proj_transform = None if ogr_drainage_line_shapefile_lyr_proj != osr_geographic_proj: proj_transform = osr.CoordinateTransformation( ogr_drainage_line_shapefile_lyr_proj, osr_geographic_proj) # print valid field names to table with open_csv(out_csv_file, 'w') as outfile: writer = csv_writer(outfile) writer.writerow(['rivid', 'lat', 'lon', 'z']) for feature in ogr_drainage_line_shapefile_lyr: feat_geom = feature.GetGeometryRef() if proj_transform: feat_geom.Transform(proj_transform) centroid = feat_geom.Centroid() centroid_pt = centroid.GetPoint(0) writer.writerow([ feature.GetField(river_id), centroid_pt[1], centroid_pt[0], centroid_pt[2] ]) del ogr_drainage_line_shapefile
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # ''' Lê CNPJs e CPFs na entrada padrão e escreve um CSV na saída com o resultado da consulta da prestação de contas da campanha de 2008. ''' import sys from csv import writer as csv_writer from tse.prestacao_de_contas import doador_2008 if __name__ == '__main__': csv = csv_writer(sys.stdout) csv.writerow(doador_2008.campos) for line in sys.stdin: cnpj_ou_cpf = line.strip() #sys.stderr.write('Pesquisando %s...\n' % cnpj_ou_cpf) campos = doador_2008(cnpj_ou_cpf) if campos: csv.writerow(campos) # vim:tabstop=4:expandtab:smartindent:encoding=utf8
def main(argv): ap = ArgumentParser(prog="generate-semantic-model") ap.add_argument("-v", "--verbose", default=False, action="store_true", help="Turn on verbose logging.") ap.add_argument("-i", "--initial-decays", default=5, type=int) ap.add_argument("-c", "--convergence-decays", default=2, type=int) ap.add_argument("-a", "--arc-epochs", default=3, type=int) ap.add_argument("-l", "--layers", default=2, type=int) ap.add_argument("-w", "--width", default=100, type=int) ap.add_argument("--word-input", default=False, action="store_true") ap.add_argument("-p", "--pre-existing", default=False, action="store_true") ap.add_argument("-m", "--monolith", default=False, action="store_true") ap.add_argument("--key-set", nargs="*", default=None) ap.add_argument("data_dir") ap.add_argument("sequential_dir") ap.add_argument("states_dir") ap.add_argument("encoding_dir") aargs = ap.parse_args(argv) setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0], aargs.verbose, False, True, True) logging.debug(aargs) lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir, True) user_log.info("Sem") hyper_parameters = model.HyperParameters(aargs.layers, aargs.width) extra = { "word_input": aargs.word_input, "monolith": aargs.monolith, } if aargs.pre_existing: sem = load_sem(lstm, aargs.encoding_dir) else: sem = generate_sem(lstm, hyper_parameters, extra, aargs.states_dir, aargs.arc_epochs, aargs.encoding_dir, aargs.key_set, aargs.initial_decays, aargs.convergence_decays) keys_sem, total_sem = test_model(lstm, sem, aargs.states_dir, False, aargs.key_set) # TODO #user_log.info("Baseline") #baseline = generate_baseline(aargs.data_dir, lstm, hyper_parameters, extra) #scores_baseline, totals_baseline = test_model(lstm, baseline, aargs.states_dir, True, aargs.key_set) with open(os.path.join(aargs.encoding_dir, "analysis-breakdown.csv"), "w") as fh: writer = csv_writer(fh) writer.writerow(["technique", "key", "perplexity"]) for key, perplexity in sorted(keys_sem.items()): writer.writerow(["sem", key, "%f" % perplexity]) #for key, scores in sorted(scores_baseline.items()): # for name, score in sorted(scores.items()): # writer.writerow(["baseline", key, name, "%f" % score]) with open(os.path.join(aargs.encoding_dir, "analysis-totals.csv"), "w") as fh: writer = csv_writer(fh) writer.writerow(["technique", "perplexity"]) writer.writerow(["sem", "%f" % total_sem]) #for name, score in sorted(totals_baseline.items()): # writer.writerow(["baseline", name, "%f" % score]) return 0
def main(argv): ap = ArgumentParser(prog="extract-perplexity") ap.add_argument("log_file") aargs = ap.parse_args(argv) name_series = {} maximum_epoch = 0 with open(aargs.log_file, "r") as fh: epoch = None arc_epochs = None stored_validation = None stored_name = None for line in fh.readlines(): name = matched_invocation(line) if name is not None: epoch = 0 arc_epochs = 0 stored_name = name name_series[stored_name] = {} if stored_name is not None: if matches_epoch(line): arc_epochs += 1 if matches_load(line): arc_epochs = 0 stored_validation = None if matches_save(line): epoch += arc_epochs arc_epochs = 0 if epoch > maximum_epoch: maximum_epoch = epoch if stored_validation is not None: name_series[stored_name][epoch] = (stored_validation, None) stored_validation = None validation = matched_validation(line) if validation is not None: stored_validation = validation test = matched_test(line) if test is not None: name_series[stored_name][epoch] = ( name_series[stored_name][epoch][0], test) total = matched_total(line) if total is not None: if name_series[stored_name][epoch][1] is None: name_series[stored_name][epoch] = ( name_series[stored_name][epoch][0], total) stored_name = None header = ["epoch"] for name, series in sorted(name_series.items()): header += ["%s - Dev" % name, "%s - Test" % name] row_columns = [] for epoch in range(maximum_epoch): if any([epoch in series for name, series in name_series.items()]): row = [epoch] for name, series in sorted(name_series.items()): if epoch in series: values = series[epoch] row += [ "%.4f" % values[0], "" if values[1] is None else "%.4f" % values[1] ] else: row += ["", ""] row_columns += [row] writer = csv_writer(sys.stdout) writer.writerow(header) for row in row_columns: writer.writerow(row) return 0
average_GFP = wf.average_qualifying_value_per_region(qualifying_GFP, in_channel=['pre_cell_labels', 'projected_GFP', 'qualifying_GFP'], out_channel=['average_GFP', 'average_GFP_pad']) pre_render = examples.xi_support.xi_pre_render(average_GFP, in_channel=['name pattern', 'projected_GFP', 'qualifying_GFP', 'pre_cell_labels', 'average_GFP_pad', 'projected_mCh', 'mCherry', 'GFP', 'group id'], out_channel='_', save=True) # since the resolution for the mitochondria is so much lower when compared to yeast cells, we will # have to perform a bit more straightforward of cutting and with open('xi_analys_results.csv', 'wb') as output_file: writer = csv_writer(output_file) writer.writerow(['file', 'time', 'correlation coeff', 'median GFP', 'average GFP', 'linreg slope', 'linreg rvalue', 'linreg pvalue']) prev_time = time() for primary_namespace in pre_render: print '%s - analyzed %s - %s in %s' % (strftime('%X %x'), primary_namespace['name pattern'], primary_namespace['group id'], time() - prev_time) prev_time = time()
def xi_pre_render(name_pattern, proj_gfp, qual_gfp, cell_labels, average_gfp_pad, proj_mch, mch, gfp, timestamp, save=False, directory_to_save_to='verification', mch_cutoff=0.2, slector_cutoff=0.1): plt.figure(figsize=(20, 15)) plt.suptitle(name_pattern) main_ax = plt.subplot(231) plt.title('GFP') plt.imshow(proj_gfp, interpolation='nearest') plt.contour(cell_labels > 0, [0.5], colors='w') plt.subplot(232, sharex=main_ax, sharey=main_ax) plt.title('log-GFP') plt.imshow(np.log(proj_gfp + np.min(proj_gfp[proj_gfp > 0])), cmap='hot', interpolation='nearest') plt.contour(cell_labels > 0, [0.5], colors='w') plt.subplot(233, sharex=main_ax, sharey=main_ax) plt.title('raw segmentation') plt.imshow(qual_gfp, cmap='gray', interpolation='nearest') plt.contour(cell_labels > 0, [0.5], colors='w') ax = plt.subplot(234, sharex=main_ax, sharey=main_ax) plt.title('labeled segmentation') plt.imshow(cell_labels, cmap=plt.cm.spectral, interpolation='nearest') unique = np.unique(cell_labels) for i in unique: mask = cell_labels == i x, y = scipy.ndimage.measurements.center_of_mass(mask) ax.text(y-8, x+8, '%s' % i, fontsize=10) plt.subplot(235) selector = np.logical_and(mch > slector_cutoff, gfp > slector_cutoff) plt.title('mCh-GFP correlation - %s, qual GFP intensity: %s' % (np.corrcoef(mch[selector], gfp[selector])[0, 1], np.median(gfp[mch > mch_cutoff]))) slope, intercept, rvalue, pvalue, stderr = linregress(mch[selector], gfp[selector]) better2D_desisty_plot(mch[selector], gfp[selector]) linarray = np.arange(0.1, 0.5, 0.05) plt.plot(linarray, intercept+slope*linarray, 'r') plt.xlabel('mCherry') plt.ylabel('GFP') plt.subplot(236, sharex=main_ax, sharey=main_ax) plt.title('mCherry') plt.imshow(proj_mch, interpolation='nearest') plt.contour(cell_labels > 0, [0.5], colors='w') with open('xi_analys_results.csv', 'ab') as output_file: writer = csv_writer(output_file) puck = [name_pattern, timestamp, np.corrcoef(mch[selector], gfp[selector])[0, 1], np.median(gfp[mch > mch_cutoff]), np.average(gfp[mch > mch_cutoff]), slope, rvalue, pvalue] writer.writerow(puck) if not save: plt.show() else: name_puck = directory_to_save_to+'/'+'xi_pre_render-'+timestamp+'-'+name_pattern+'.png' plt.savefig(name_puck) plt.close()
def write_flows_to_csv(self, path_to_output_file, river_index=None, river_id=None, date_search_start=None, date_search_end=None, daily=False, mode="mean"): """ Write out RAPID output to CSV file. .. note:: Need either *reach\_id* or *reach\_index* parameter, but either can be used. Parameters: path_to_output_file(str): Path to the output csv file. river_index(Optional[datetime]): This is the index of the river in the file you want the streamflow for. river_id(Optional[datetime]): This is the river ID that you want the streamflow for. date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". Example writing entire time series to file: .. code:: python from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #for writing entire time series to file qout_nc.write_flows_to_csv('/timeseries/Qout_3624735.csv', river_id=river_id, ) #if file is CF compliant, you can write out daily average #NOTE: Getting the river index is not necessary #this is just an example of how to use this river_index = qout_nc.get_river_index(river_id) qout_nc.write_flows_to_csv('/timeseries/Qout_daily.csv', river_index=river_index, daily=True, ) Example writing entire time series as daily average to file: .. code:: python from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #NOTE: Getting the river index is not necessary #this is just an example of how to use this river_index = qout_nc.get_river_index(river_id) #if file is CF compliant, you can write out daily average qout_nc.write_flows_to_csv('/timeseries/Qout_daily.csv', river_index=river_index, daily=True, ) Example writing entire time series as daily average to file: .. code:: python from datetime import datetime from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #if file is CF compliant, you can filter by date qout_nc.write_flows_to_csv('/timeseries/Qout_daily_date_filter.csv', river_id=river_id, daily=True, date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 9, 15), mode="max" ) """ if river_id != None: river_index = self.get_river_index(river_id) elif river_id == None and river_index == None: raise Exception("ERROR: Need reach id or reach index ...") #analyze and write if self.is_time_variable_valid() or self._is_legacy_time_valid(): time_index_range = self.get_time_index_range(date_search_start=date_search_start, date_search_end=date_search_end) qout_arr = self.get_qout_index(river_index, time_index_array=time_index_range) time_array = self.get_time_array(time_index_array=time_index_range, return_datetime=True) df = pd.DataFrame(qout_arr.T, index=time_array) if daily: df = df.resample('D') if mode == "mean": df = df.mean() elif mode == "max": df = df.max() else: raise Exception("Invalid mode ...") df.to_csv(path_to_output_file, header=False) else: print("Valid time variable not found. Printing values only ...") qout_arr = self.get_qout_index(river_index) with open_csv(path_to_output_file, 'w') as outcsv: writer = csv_writer(outcsv) for index in xrange(len(qout_arr)): writer.writerow([index, "{0:.5f}".format(qout_arr[index])])
def CreateMuskingumKfacFile(in_drainage_line, river_id, length_id, slope_id, celerity, formula_type, in_connectivity_file, out_kfac_file, length_units="km", slope_percentage=False, file_geodatabase=None): r""" Creates the Kfac file for calibration. The improved methods using slope to generate values for Kfac were used here: Tavakoly, A. A., A. D. Snow, C. H. David, M. L. Follum, D. R. Maidment, and Z.-L. Yang, (2016) "Continental-Scale River Flow Modeling of the Mississippi River Basin Using High-Resolution NHDPlus Dataset", Journal of the American Water Resources Association (JAWRA) 1-22. DOI: 10.1111/1752-1688.12456 Formula Type Options: 1. :math:`Kfac_n = \frac{RiverLength_n}{Celerity_n}` 2. :math:`Kfac_n = \eta*\frac{RiverLength_n}{\sqrt{RiverSlope_n}}` 3. :math:`Kfac_n = \eta*\frac{RiverLength_n}{\sqrt{RiverSlope_n}}\left[0.05, 0.95\right]` Where: :math:`a = \frac{\sum_{n=1}^{r} \frac{RiverLength_n}{Celerity_n}}{r}` :math:`b = \frac{\sum_{n=1}^{r} \frac{RiverLength_n}{\sqrt{RiverSlope_n}}}{r}` :math:`\eta = \frac{a}{b}` r = Number of river segments. Parameters ---------- in_drainage_line: str Path to the stream network (i.e. Drainage Line) shapefile. river_id: str The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). length_id: str The field name containging the length of the river segment (Ex. 'LENGTHKM' or 'Length'). slope_id: str The field name containging the slope of the river segment (Ex. 'Avg_Slope' or 'Slope'). celerity: float The flow wave celerity for the watershed in meters per second. 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown. formula_type: int An integer representing the formula type to use when calculating kfac. in_connectivity_file: str The path to the RAPID connectivity file. out_kfac_file: str The path to the output kfac file. length_units: str, optional The units for the length_id field. Supported types are "m" for meters and "km" for kilometers. slope_percentage: bool, optional If True, it assumes the slope given is in percentage and will divide by 100. Default is False. file_geodatabase: str, optional Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class (WARNING: Not always stable with GDAL). Example:: from RAPIDpy.gis.muskingum import CreateMuskingumKfacFile CreateMuskingumKfacFile( in_drainage_line='/path/to/drainageline.shp', river_id='LINKNO', length_id='Length', slope_id='Slope', celerity=1000.0/3600.0, formula_type=3, in_connectivity_file='/path/to/rapid_connect.csv', out_kfac_file='/path/to/kfac.csv', length_units="m", ) """ # noqa ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ open_shapefile(in_drainage_line, file_geodatabase) number_of_features = ogr_drainage_line_shapefile_lyr.GetFeatureCount() river_id_list = np.zeros(number_of_features, dtype=np.int32) length_list = \ np.zeros(number_of_features, dtype=np.float32) slope_list = np.zeros(number_of_features, dtype=np.float32) for feature_idx, drainage_line_feature in \ enumerate(ogr_drainage_line_shapefile_lyr): river_id_list[feature_idx] = drainage_line_feature.GetField(river_id) length = drainage_line_feature.GetField(length_id) if length is not None: length_list[feature_idx] = length slope = drainage_line_feature.GetField(slope_id) if slope is not None: slope_list[feature_idx] = slope del ogr_drainage_line_shapefile if slope_percentage: slope_list /= 100.0 if length_units == "m": length_list /= 1000.0 elif length_units != "km": raise Exception("Invalid length units supplied. " "Supported units are m and km.") connectivity_table = np.loadtxt(in_connectivity_file, delimiter=",", ndmin=2, dtype=int) length_slope_array = [] kfac2_array = [] if formula_type == 1: log("River Length/Celerity") elif formula_type == 2: log("Eta*River Length/Sqrt(River Slope)") elif formula_type == 3: log("Eta*River Length/Sqrt(River Slope) [0.05, 0.95]") else: raise Exception("Invalid formula type. Valid range: 1-3 ...") with open_csv(out_kfac_file, 'w') as kfacfile: kfac_writer = csv_writer(kfacfile) for row in connectivity_table: stream_id = int(float(row[0])) stream_id_index = river_id_list == stream_id # find the length stream_length = length_list[stream_id_index] * 1000.0 if formula_type >= 2: # find the slope stream_slope = slope_list[stream_id_index] if stream_slope <= 0: # if no slope, take average of upstream # and downstream to get it next_down_id = int(float(row[1])) next_down_slope = 0 try: next_down_index = \ np.where(river_id_list == next_down_id)[0][0] next_down_slope = slope_list[next_down_index] except IndexError: pass next_up_id = int(float(row[3])) next_up_slope = 0 try: next_up_index = \ np.where(river_id_list == next_up_id)[0][0] next_up_slope = slope_list[next_up_index] except IndexError: pass stream_slope = (next_down_slope + next_up_slope) / 2.0 if stream_slope <= 0: # if still no slope, set to 0.001 stream_slope = 0.001 length_slope_array.append(stream_length / stream_slope**0.5) kfac2_array.append(stream_length / celerity) else: kfac = stream_length / celerity kfac_writer.writerow(kfac) if formula_type >= 2: if formula_type == 3: log("Filtering Data by 5th and 95th Percentiles ...") length_slope_array = np.array(length_slope_array) percentile_5 = np.percentile(length_slope_array, 5) percentile_95 = np.percentile(length_slope_array, 95) length_slope_array[length_slope_array < percentile_5] = \ percentile_5 length_slope_array[length_slope_array > percentile_95] = \ percentile_95 eta = np.mean(kfac2_array) / np.mean(length_slope_array) log("Kfac2_Avg {0}".format(np.mean(kfac2_array))) log("Length_Slope Avg {0}".format(np.mean(length_slope_array))) log("Eta {0}".format(eta)) log("Writing Data ...") for len_slope in length_slope_array: kfac_writer.writerow(eta * len_slope)
def CreateMuskingumKfacFile(in_drainage_line, river_id, length_id, slope_id, celerity, formula_type, in_connectivity_file, out_kfac_file, length_units="km", slope_percentage=False, file_geodatabase=None): """ Creates the Kfac file for calibration. Formula Type Options: 1. River Length/Celerity; 2. Eta*River Length/Sqrt(River Slope); and 3 is 3. Eta*River Length/Sqrt(River Slope) [0.05, 0.95] Where Eta = Average(River Length/Co of all rivers) / Average(River Length/Sqrt(River Slope) of all rivers) Args: in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). length_id(str): The field name containging the length of the river segment (Ex. 'LENGTHKM' or 'Length'). slope_id(str): The field name containging the slope of the river segment (Ex. 'Avg_Slope' or 'Slope'). celerity(float): The flow wave celerity for the watershed in meters per second. 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown. formula_type(int): An integer representing the formula type to use when calculating kfac. in_connectivity_file(str): The path to the RAPID connectivity file. out_kfac_file(str): The path to the output kfac file. length_units(Optional[str]): The units for the length_id field. Supported types are "m" for meters and "km" for kilometers. slope_percentage(Optional[bool]): If True, it assumes the slope given is in percentage and will divide by 100. Default is False. file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) Example:: from RAPIDpy.gis.muskingum import CreateMuskingumKfacFile #------------------------------------------------------------------------------ #main process #------------------------------------------------------------------------------ if __name__ == "__main__": CreateMuskingumKfacFile(in_drainage_line='/path/to/drainageline.shp', river_id='LINKNO', length_id='Length', slope_id='Slope', celerity=1000.0/3600.0, formula_type=3, in_connectivity_file='/path/to/rapid_connect.csv', out_kfac_file='/path/to/kfac.csv', length_units="m", ) """ if file_geodatabase: gdb_driver = ogr.GetDriverByName("OpenFileGDB") ogr_file_geodatabase = gdb_driver.Open(file_geodatabase) ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer( in_drainage_line) else: ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer( ) number_of_features = ogr_drainage_line_shapefile_lyr.GetFeatureCount() river_id_list = np.zeros(number_of_features, dtype=np.int32) length_list = np.zeros(number_of_features, dtype=np.float32) slope_list = np.zeros(number_of_features, dtype=np.float32) for feature_idx, drainage_line_feature in enumerate( ogr_drainage_line_shapefile_lyr): river_id_list[feature_idx] = drainage_line_feature.GetField(river_id) length = drainage_line_feature.GetField(length_id) if length is not None: length_list[feature_idx] = length slope = drainage_line_feature.GetField(slope_id) if slope is not None: slope_list[feature_idx] = slope if slope_percentage: slope_list /= 100.0 if length_units == "m": length_list /= 1000.0 elif length_units != "km": raise Exception( "ERROR: Invalid length units supplied. Supported units are m and km." ) connectivity_table = np.loadtxt(in_connectivity_file, delimiter=",", ndmin=2, dtype=int) length_slope_array = [] kfac2_array = [] if formula_type == 1: print("River Length/Celerity") elif formula_type == 2: print("Eta*River Length/Sqrt(River Slope)") elif formula_type == 3: print("Eta*River Length/Sqrt(River Slope) [0.05, 0.95]") else: raise Exception("Invalid formula type. Valid range: 1-3 ...") with open_csv(out_kfac_file, 'w') as kfacfile: kfac_writer = csv_writer(kfacfile) for row in connectivity_table: streamID = int(float(row[0])) streamIDindex = river_id_list == streamID # find the length stream_length = length_list[streamIDindex] * 1000.0 if formula_type >= 2: # find the slope stream_slope = slope_list[streamIDindex] if stream_slope <= 0: #if no slope, take average of upstream and downstream to get it nextDownID = int(float(row[1])) next_down_slope = 0 try: next_down_index = np.where( river_id_list == nextDownID)[0][0] next_down_slope = slope_list[next_down_index] except IndexError: pass nextUpID = int(float(row[3])) next_up_slope = 0 try: next_up_index = np.where( river_id_list == nextUpID)[0][0] next_up_slope = slope_list[next_up_index] except IndexError: pass stream_slope = (next_down_slope + next_up_slope) / 2.0 if stream_slope <= 0: #if still no slope, set to 0.001 stream_slope = 0.001 length_slope_array.append(stream_length / stream_slope**0.5) kfac2_array.append(stream_length / celerity) else: kfac = stream_length / celerity kfac_writer.writerow(kfac) if formula_type >= 2: if formula_type == 3: print("Filtering Data by 5th and 95th Percentiles ...") length_slope_array = np.array(length_slope_array) percentile_5 = np.percentile(length_slope_array, 5) percentile_95 = np.percentile(length_slope_array, 95) length_slope_array[ length_slope_array < percentile_5] = percentile_5 length_slope_array[ length_slope_array > percentile_95] = percentile_95 eta = np.mean(kfac2_array) / np.mean(length_slope_array) print("Kfac2_Avg {0}".format(np.mean(kfac2_array))) print("Length_Slope Avg {0}".format(np.mean(length_slope_array))) print("Eta {0}".format(eta)) print("Writing Data ...") for len_slope in length_slope_array: kfac_writer.writerow(eta * len_slope)
clean_dept = departments.clean(current_dept) new_rows.append( [clean_dept, class_match['id'], class_match['name'], total]) elif label == 'Total': # When 'Total' row is reached, reset current_dept current_dept = '' else: # Some departments span multiple lines if current_dept: current_dept = current_dept + ' ' + label.strip() else: current_dept = label.strip() # Add year and fund columns to each row new_rows = [['2017', 'General Fund'] + row for row in new_rows] # Group rows by everything but total and aggregate the total (sum) new_rows = aggregate_similar_rows(new_rows, 5) # Sort rows for idempotency new_rows.sort() header = ['Fiscal Year', 'Fund', 'Department', 'Class ID', 'Class', 'Total'] with open(OUTPUT_FILE_PATH, 'wb') as f: writer = csv_writer(f) writer.writerow(header) writer.writerows(new_rows) print('Wrote {0} rows to {1}'.format(len(new_rows), OUTPUT_FILE_PATH))
def run( dir_MRI="data/ALFA_PET", dir_PET="data/ALFA_PET", dir_RR="data/Atlas/CL_2mm", outfile="data/ALFA_PET/Quant_realigned.csv", glob_PET="*_PET.nii.gz", glob_MRI="*_MRI.nii.gz", ): """ Args: dir_MRI (str or Path): MRI directory dir_PET (str or Path): PET directory dir_RR (str or Path): Reference regions ROIs directory (standard Centiloid RR from GAAIN Centioid website: 2mm, nifti) outfile (str or Path): Output quantification file Returns: fname (list[str]) greyCerebellum (list[float]) wholeCerebellum (list[float]) wholeCerebellumBrainStem (list[float]) pons (list[float]) """ # PET & MR images lists s_PET_dir = list(tmap(gunzip, Path(dir_PET).glob(glob_PET), leave=False)) s_MRI_dir = list(tmap(gunzip, Path(dir_MRI).glob(glob_MRI), leave=False)) if len(s_PET_dir) != len(s_MRI_dir): raise IndexError("Different number of PET and MR images") eng = get_matlab() dir_spm = fspath(Path(eng.which("spm")).parent) for d_PET, d_MRI in tzip(s_PET_dir, s_MRI_dir): with tic("Step 0: Reorient PET subject"): eng.f_acpcReorientation(d_PET, nargout=0) with tic("Step 0: Reorient MRI subject"): eng.f_acpcReorientation(d_MRI, nargout=0) with tic("Step 1: CorregisterEstimate"): eng.f_1CorregisterEstimate(d_MRI, dir_spm, nargout=0) # Check Reg with tic("Step 2: CorregisterEstimate"): eng.f_2CorregisterEstimate(d_MRI, d_PET, nargout=0) # Check Reg with tic("Step 3: Segment"): eng.f_3Segment(d_MRI, dir_spm, nargout=0) with tic("Step 4: Normalise"): d_file_norm = fspath( Path(d_MRI).parent / ("y_" + Path(d_MRI).name)) eng.f_4Normalise(d_file_norm, d_MRI, d_PET, nargout=0) s_PET = list( map( fspath, Path(dir_PET).glob("w" + (glob_PET[:-3] if glob_PET.lower(). endswith(".gz") else glob_PET)))) res = eng.f_Quant_centiloid(s_PET, fspath(dir_RR), nargout=5) if outfile: with open(outfile, "w") as fd: f = csv_writer(fd) f.writerow(("Fname", "GreyCerebellum", "WholeCerebellum", "WholeCerebellumBrainStem", "Pons")) f.writerows(zip(*res)) return res
def check_logfile(fname, write_csv=False, nback=0, write_datetime=False): """ run over a Cheetah logfile and analyzed reference settings etc """ _, protocol, _ = parser(fname) base_name = os.path.splitext(os.path.basename(fname))[0] all_settings = analyze_drs(protocol) for i_setting, setting in enumerate(all_settings): print() if setting.folder is None: msg = 'Warning: Recording Stop -> Start without folder change!' else: msg = setting.folder print('Start: {} ({})'.format(setting.start_rec[1], setting.start_timestamp)) print('Stop: {} ({})'.format(setting.stop_rec[1], setting.stop_timestamp)) # print('Duration: {} min'. # format((setting.stop_rec[1] - setting.start_rec[1]))) out_str = create_rep(setting.num2name, setting.name2num, setting.crefs, setting.lrefs, setting.grefs) if write_csv: setting = all_settings[-nback - 1] if setting.folder is None: msg = 'Warning: Recording Stop -> Start without folder change!' else: msg = setting.folder out_str = create_rep(setting.num2name, setting.name2num, setting.crefs, setting.lrefs, setting.grefs) outfname = base_name + '_{:02d}.csv'.\ format(len(all_settings) - nback - 1) with open(outfname, 'w') as outf: outf.write('# {} {} {}\n'.format(msg, setting.start_rec[1], setting.stop_rec[1])) csvwriter = csv_writer(outf) for line in out_str: csvwriter.writerow(line) if write_datetime: setting = all_settings[-nback - 1] date, start, stop = parse_times(setting) print(date, start, stop) if date is None: out = '# Date not guessed because Recording was stopped'\ ' and re-started without folder change!\n' else: out = '# {}\ncreate_folder {}\n'.\ format(setting.folder, date.strftime('%Y-%m-%d %H:%M:%S')) start_ts = setting.start_timestamp stop_ts = setting.stop_timestamp for name, d, t in (('start', start, start_ts), ('stop', stop, stop_ts)): out += name + '_recording {} {} {}\n'.\ format(d.date().isoformat(), d.time().isoformat(), t) diff_time = (stop_ts - start_ts) / 1e6 - (stop - start).seconds out += 'cheetah_ahead: {}\n'.format(diff_time) if os.path.exists(DATE_FNAME): print('{} exists, not overwriting!'.format(DATE_FNAME)) else: with open(DATE_FNAME, 'w') as fid: fid.write(out)
def akshay_summarize(name_pattern, group_by, av_nuc_p53, av_en_p53, av_nuc_p21, av_en_p21, output): with open(output, 'ab') as output_file: writer = csv_writer(output_file) for i, nuc_pac in enumerate(zip(av_nuc_p53, av_en_p53, av_nuc_p21, av_en_p21)): writer.writerow([name_pattern, group_by, i, nuc_pac[0], nuc_pac[1], nuc_pac[2], nuc_pac[3]])
def _get_default_csv(self, open_file): # quoting=csv.QUOTE_MINIMAL - default return csv_writer(open_file, **self.csv_params)
def Kristen_render(name_pattern, group_id, mCherry, extranuclear_mCherry_pad, GFP_orig, mCherry_orig, output, save=False, directory_to_save_to='verification'): labels, _ = ndi.label(extranuclear_mCherry_pad) unique_segmented_cells_labels = np.unique(labels)[1:] mCherry_cutoff = np.zeros_like(mCherry) qualifying_cell_label = [] qualifying_regression_stats = [] for cell_label in unique_segmented_cells_labels: mCherry_2 = np.zeros_like(mCherry) my_mask = labels == cell_label average_apply_mask = np.mean(mCherry[my_mask]) intensity = np.sum(mCherry[my_mask]) binary_pad = np.zeros_like(mCherry) binary_pad[my_mask] = 1 pixel = np.sum(binary_pad[my_mask]) if (average_apply_mask > .05 or intensity > 300) and pixel > 4000: GFP_limited_to_cell_mask = cf._3d_stack_2d_filter(GFP_orig, my_mask) mCherry_limited_to_cell_mask = cf._3d_stack_2d_filter(mCherry_orig, my_mask) qualifying_3d_GFP = GFP_limited_to_cell_mask[mCherry_limited_to_cell_mask>50] average_3d_GFP = np.mean(qualifying_3d_GFP) median_3d_GFP = np.median(qualifying_3d_GFP) std_3d_GFP = np.std(qualifying_3d_GFP) sum_qualifying_GFP = np.sum(qualifying_3d_GFP) nonqualifying_3d_GFP = GFP_limited_to_cell_mask[mCherry_limited_to_cell_mask<=50] average_nonqualifying_3d_GFP = np.mean(nonqualifying_3d_GFP) median_nonqualifying_3d_GFP = np.median(nonqualifying_3d_GFP) std_nonqualifying_3d_GFP = np.std(nonqualifying_3d_GFP) sum_nonqualifying_GFP = np.sum(nonqualifying_3d_GFP) sum_total_GFP = sum_qualifying_GFP + sum_nonqualifying_GFP percent_qualifying_over_total_GFP = sum_qualifying_GFP/sum_total_GFP # report the percentage too or sums are sufficient? GFP_orig_qualifying = cf._3d_stack_2d_filter(GFP_orig, my_mask) mCherry_orig_qualifying = cf._3d_stack_2d_filter(mCherry_orig, my_mask) mCherry_1d = mCherry_orig_qualifying[mCherry_orig_qualifying > 50] GFP_1d = GFP_orig_qualifying[mCherry_orig_qualifying>50] regression_results = stats.linregress(GFP_1d, mCherry_1d) mCherry_2[my_mask] = mCherry[my_mask] mCherry_cutoff[my_mask] = mCherry[my_mask] qualifying_cell_label.append(cell_label) qualifying_regression_stats.append((regression_results[0], regression_results[2], regression_results[3])) name_pattern_split = name_pattern.split(' - ') transfection_label = name_pattern_split[0] cell_type = name_pattern_split[1] exp_time = name_pattern_split[2] image_number = name_pattern_split[4] with open(output, 'ab') as output_file: writer = csv_writer(output_file, delimiter='\t') writer.writerow([transfection_label, cell_type, exp_time, image_number, cell_label, sum_qualifying_GFP, sum_total_GFP, average_3d_GFP, median_3d_GFP, std_3d_GFP, average_nonqualifying_3d_GFP, median_nonqualifying_3d_GFP, std_nonqualifying_3d_GFP, regression_results[0], regression_results[2], regression_results[3]]) plt.figure(figsize=(26.0, 15.0)) plt.title('Kristen\'s Data') plt.suptitle(name_pattern) main_ax = plt.subplot(221) plt.subplot(221, sharex=main_ax, sharey=main_ax) plt.title('mCherry Binary') im = plt.imshow(extranuclear_mCherry_pad, interpolation='nearest', cmap = 'hot') plt.colorbar(im) plt.subplot(222, sharex=main_ax, sharey=main_ax) plt.title('mCherry') plt.imshow(mCherry, interpolation='nearest') plt.contour(extranuclear_mCherry_pad, [0.5], colors='k') plt.subplot(223) dplt.better2D_desisty_plot(GFP_1d, mCherry_1d) plt.title('mCherry Intensity as a Function of GFP Voxel') plt.xlabel('GFP Voxel') plt.ylabel('mCherry Intensity') plt.subplot(224, sharex=main_ax, sharey=main_ax) plt.title('mCherry-cutoff applied') plt.imshow(mCherry_2, interpolation='nearest') if not save: plt.show() else: name_puck = directory_to_save_to + '/' + 'Kristen-' + name_pattern+ '_cell' + str(cell_label)+ '.png' plt.savefig(name_puck) plt.close() plt.figure(figsize=(26.0, 15.0)) main_ax = plt.subplot(121) plt.subplot(121, sharex=main_ax, sharey=main_ax) plt.suptitle('mCherry Before and After Qualifying Cell Cutoff is Applied') plt.title('mCherry') im = plt.imshow(mCherry, interpolation='nearest') plt.colorbar(im) plt.subplot(122, sharex=main_ax, sharey=main_ax) plt.title('mCherry') plt.imshow(mCherry_cutoff, interpolation='nearest') if not save: plt.show() else: name_puck = directory_to_save_to + '/' + 'Kristen-' + name_pattern + 'cutoff_app' + '.png' plt.savefig(name_puck) plt.close() return qualifying_regression_stats
def write_results_csv_exploits_to_cve( self, results_to_write: list or dict, dest_dir: str, csv_file: str, hosts_results: dict, csv_dir=DefaultValues.CSV_RESULTS_DIRECTORY, ) -> None: """ TL;DR: This function matches exploits to appropriate CVEs. In the more long way: this function firstly search for all products that connected with some particular CVE, for example, let it be CVE-2014-0160 and products like "OpenSSL, Apache, Nginx", any other, etc. Then, when all products are collected, we can match exploits to this CVEs and also to these products. On the finish, we will get results like: "CVE #1, List of products, Exploit #1, description" "CVE #1, List of products, Exploit #2, description" etc. :param results_to_write: this is CVE/Exploits collections with definitions :param dest_dir: destination dir to write results :param csv_file: file to save results :param hosts_results: results about all the scanned hosts :param csv_dir: directory to save CSVs :return: None """ if not results_to_write: return vulnerabilities_mapping = {} for host, info in hosts_results.items(): if not info.get("vulnerabilities"): continue for vulnerabilities_db, vulnerabilities_info in info.get( "vulnerabilities").items(): if not vulnerabilities_info: continue list_of_vulns = vulnerabilities_info.keys() for vulnerability in list_of_vulns: if vulnerabilities_mapping.get(vulnerability): if (info.get("product") not in vulnerabilities_mapping[vulnerability]): vulnerabilities_mapping[vulnerability].append( info.get("product")) else: vulnerabilities_mapping.update( {vulnerability: [info.get("product")]}) path_to_csv_file = Path(".").joinpath(dest_dir).joinpath(csv_dir) path_to_csv_file.mkdir(parents=True, exist_ok=True) path_to_csv_file = path_to_csv_file.joinpath(csv_file) with open(path_to_csv_file, mode="w", newline="") as result_csv_file: _writer = csv_writer(result_csv_file, delimiter=",", quotechar='"', quoting=QUOTE_ALL) _writer.writerow([ "CVE with exploit", "Affected Products", "Exploit title", "Bulletin family", "Exploit description", "id", "Exploit HREF", "type", "CVSS Score", "CVSS Vector", "Vulners HREF", ]) for cve, exploits in results_to_write.items(): for exploit in exploits: _writer.writerow([ cve, ", ".join(vulnerabilities_mapping.get(cve)), exploit.get("title"), exploit.get("bulletinFamily"), exploit.get("description"), exploit.get("id"), exploit.get("href"), exploit.get("type"), exploit.get("cvss", {}).get("score"), exploit.get("cvss", {}).get("vector"), exploit.get("vhref"), ])
def __init__(self, filepath, header): self.filepath = filepath self.header = header with open(filepath, 'w') as file: writer = csv_writer(file) writer.writerow(header)
def amass_csv(amass_in, csv_out, by_ip=False): """ Returns Amass results in Excell compatable CSV format Param amass_in (str): the Amass data to convert to CSV Param csv_out (str): the path to the CSV output file """ amass = [] with open(amass_in, 'r') as fh: for line in fh: amass.append(json_loads(line)) with open(csv_out, 'w', newline='') as fh: amasswriter = csv_writer(fh, dialect='excel') amasswriter.writerow(['name', 'domain', 'ip', 'cidr', 'asn', 'desc', 'tag', 'source']) write_count = 0 for row in amass: name = row['name'] domain = row['domain'] addresses = row['addresses'] ip = [] cidr = [] asn = [] desc = [] tag = '' source = [] for address in addresses: ip.append(address['ip']) cidr.append(address['cidr']) asn.append(str(address['asn'])) desc.append(address['desc']) tag = row['tag'] # the old format did not use a [list] for source if 'sources' in row: source = row['sources'] elif 'source' in row: source.append(row['source']) if by_ip: for i, d in enumerate(ip): amasswriter.writerow([name, domain, d, cidr[i], asn[i], desc[i], tag, source[0]]) write_count += 1 else: amasswriter.writerow([name, domain, '\r\n'.join(ip), '\r\n'.join(cidr), '\r\n'.join(asn), '\r\n'.join(desc), tag, '\r\n'.join(source)]) write_count += 1 return write_count
def predict_folder(img_dir, model_dir, progress_hook=None, move=True, csv=False): """ Run your model on a directory of images. This will also go through any images in existing subdirectories. Move each image into a subdirectory structure based on the prediction -- the predicted label becomes the directory name where the image goes. :param img_dir: the filepath to your directory of images. :param model_dir: path to the Lobe Tensorflow SavedModel export. :param progress_hook: an optional function that will be run with progress_hook(currentProgress, totalProgress) when progress updates. :param move: a flag for whether you want to physically move the image files into a subfolder structure based on the predicted label :param csv: a flag for whether you want to create an output csv showing the image filenames and their predictions """ print(f"Predicting {img_dir}") img_dir = os.path.abspath(img_dir) # if this a .txt file, don't treat the first row as a header. Otherwise, use the first row for header column names. if not os.path.isdir(img_dir): raise ValueError( f"Please specify a directory to images. Found {img_dir}") num_items = sum(len(files) for _, _, files in os.walk(img_dir)) print(f"Predicting {num_items} items...") # load the model print("Loading model...") model = ImageModel.load(model_path=model_dir) print("Model loaded!") # create our output csv out_csv = os.path.join(img_dir, "predictions.csv") if csv: with open(out_csv, 'w', encoding="utf-8", newline='') as f: writer = csv_writer(f) writer.writerow(['File', 'Label', 'Confidence']) # iterate over the rows and predict the label curr_progress = 0 no_labels = 0 with tqdm(total=num_items) as pbar: with ThreadPoolExecutor() as executor: model_futures = [] # make our prediction jobs for root, _, files in os.walk(img_dir): for filename in files: image_file = os.path.abspath(os.path.join(root, filename)) model_futures.append( (executor.submit(predict_label_from_image_file, image_file=image_file, model=model), image_file)) for future, img_file in model_futures: label, confidence = future.result() if label is None: no_labels += 1 else: # move the file dest_file = img_file if move: filename = os.path.split(img_file)[-1] name, ext = os.path.splitext(filename) dest_dir = os.path.join(img_dir, label) os.makedirs(dest_dir, exist_ok=True) dest_file = os.path.abspath( os.path.join(dest_dir, filename)) # only move if the destination is different than the file if dest_file != img_file: try: # rename the file if there is a conflict rename_idx = 0 while os.path.exists(dest_file): new_name = f'{name}_{rename_idx}{ext}' dest_file = os.path.abspath( os.path.join(dest_dir, new_name)) rename_idx += 1 shutil.move(img_file, dest_file) except Exception as e: print(f"Problem moving file: {e}") # write the results to a csv if csv: with open(out_csv, 'a', encoding="utf-8", newline='') as f: writer = csv_writer(f) writer.writerow([dest_file, label, confidence]) pbar.update(1) if progress_hook: curr_progress += 1 progress_hook(curr_progress, num_items) print(f"Done! Number of images without predicted labels: {no_labels}")
def write_dataset(dataset, filename): with open(filename, mode='w') as file: dataset_writer = csv_writer(file, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) for row in dataset: dataset_writer.writerow(row)
def create_dataset(filepath, url_col=None, label_col=None, progress_hook=None, destination_directory=None): """ Given a file with urls to images, downloads those images to a new directory that has the same name as the file without the extension. If labels are present, further categorizes the directory to have the labels as sub-directories. :param filepath: path to a valid txt or csv file with image urls to download. :param url_col: if this is a csv, the column header name for the urls to download. :param label_col: if this is a csv, the column header name for the labels of the images. :param progress_hook: an optional function that will be run with progress_hook(currentProgress, totalProgress) when progress updates. :param destination_directory: an optional directory path to download the dataset to. """ print(f"Processing {filepath}") filepath = os.path.abspath(filepath) filename, ext = _name_and_extension(filepath) # read the file # if this a .txt file, don't treat the first row as a header. Otherwise, use the first row for header column names. if ext != '.xlsx': csv = pd.read_csv(filepath, header=None if ext == '.txt' else 0) else: csv = pd.read_excel(filepath, header=0) if ext in ['.csv', '.xlsx'] and not url_col: raise ValueError(f"Please specify an image url column for the csv.") url_col_idx = 0 if url_col: try: url_col_idx = list(csv.columns).index(url_col) except ValueError: raise ValueError( f"Image url column {url_col} not found in csv headers {csv.columns}" ) label_col_idx = None if label_col: try: label_col_idx = list(csv.columns).index(label_col) except ValueError: raise ValueError( f"Label column {label_col} not found in csv headers {csv.columns}" ) total_jobs = len(csv) print(f"Downloading {total_jobs} items...") errors = [] dest = os.path.join(destination_directory, filename) if destination_directory else filename # try/catch for keyboard interrupt try: # iterate over the rows and add to our download processing job! with tqdm(total=total_jobs) as pbar: with ThreadPoolExecutor() as executor: # for every image in the row, download it! download_futures = {} lock = Lock() for i, row in enumerate(csv.itertuples(index=False)): # job is passed to our worker processes index = i + 1 url = row[url_col_idx] label = None if label_col_idx: label = row[label_col_idx] label = None if pd.isnull(label) else label download_futures[executor.submit(download_image, url=url, directory=dest, lock=lock, label=label)] = (index, url, label) # iterate over the results to update our progress bar and write any errors to the error csv num_processed = 0 for future in as_completed(download_futures): index, url, label = download_futures[future] filename = future.result() if not filename: error_row = [index, url] if label_col_idx: error_row.append(label) errors.append(error_row) # update progress pbar.update(1) num_processed += 1 if progress_hook: progress_hook(num_processed, total_jobs) print('Cleaning up...') # write out the error csv if len(errors) > 0: errors.sort() fname, ext = os.path.splitext(filepath) error_file = f"{fname}_errors.csv" with open(error_file, 'w', newline='') as f: header = f"index,url{',label' if label_col_idx else ''}\n" f.write(header) writer = csv_writer(f) writer.writerows(errors) except Exception: raise
def main(argv): ap = ArgumentParser(prog="sem-mse") ap.add_argument("resume_a") ap.add_argument("resume_b") args = ap.parse_args(argv) output_distributions = [ i for i in pickler.load(os.path.join(args.resume_a, OUTPUT)) ] assert len(output_distributions) == 1 output_distribution = output_distributions[0] distributions_basenames = [ os.path.basename(p) for p in glob.glob(os.path.join(args.resume_a, DISTRIBUTIONS_GLOB)) ] size = None uniform_distribution = None count = 0 comparison_total = 0.0 uniform_total_a = 0.0 uniform_total_b = 0.0 distribution_total_a = 0.0 distribution_total_b = 0.0 for distributions_basename in sorted(distributions_basenames, key=file_sort_key): stream_a = pickler.load( os.path.join(args.resume_a, distributions_basename)) stream_b = pickler.load( os.path.join(args.resume_b, distributions_basename)) for distribution_a, distribution_b in zip(stream_a, stream_b): assert len(distribution_a) == len(distribution_b) if size is None: size = len(distribution_a) value = 1.0 / size uniform_distribution = {value for key in distribution_a.keys()} comparison_total += sum_squared_error(distribution_a, distribution_b) uniform_total_a += sum_squared_error(distribution_a, uniform_distribution) uniform_total_b += sum_squared_error(distribution_b, uniform_distribution) distribution_total_a += sum_squared_error(distribution_a, output_distribution) distribution_total_b += sum_squared_error(distribution_b, output_distribution) count += 1 try: next(stream_a) raise ValueError("stream a wasn't exhausted!") except StopIteration as e: pass try: next(stream_b) raise ValueError("stream b wasn't exhausted!") except StopIteration as e: pass with open("output-sem-mse-analysis.csv", "w") as fh: writer = csv_writer(fh) writer.writerow([ "comparison", "sum of squared error", "mean squared error", "mse normalized" ]) writer.writerow(row_data("comparison", comparison_total, count, size)) writer.writerow(row_data("uniform a", uniform_total_a, count, size)) writer.writerow(row_data("uniform b", uniform_total_b, count, size)) writer.writerow( row_data("distribution a", distribution_total_a, count, size)) writer.writerow( row_data("distribution b", distribution_total_b, count, size)) return 0
def _create_csv_writer(cls, buffer): return csv_writer(buffer, delimiter=cls.field_delimiter, lineterminator=cls.row_delimiter)
def save_games(games: List[CSVGame], csv_filename: str = GAMES_CSV) -> None: with open(csv_filename, 'w', newline='') as csv_file: writer = csv_writer(csv_file) writer.writerow(CSVGame._fields) # Write headers. writer.writerows(games)
def download_hiwat(request): """ Get hiwat data """ print(request) get_data = request.GET try: comid = get_data['comid'] startdate = get_data['startdate'] country = 'Nepal' model = 'Hiwat' # path = os.path.join(app.get_custom_setting('forescast_data')) #Added for the THREEDS SERVER path = os.path.join(app.get_app_workspace().path, 'forecast') filename = [f for f in os.listdir(path) if 'Qout_hiwat' in f] filename.reverse() selectedDate = int(startdate) filename = filename[selectedDate] #filename = filename[0] file = path + '/' + filename res = nc.Dataset(file, 'r') dates_raw = res.variables['time'][:] dates = [] for d in dates_raw: dates.append(dt.datetime.fromtimestamp(d)) comid_list = res.variables['rivid'][:] comid_index = int(np.where(comid_list == int(comid))[0]) values = [] for l in list(res.variables['Qout'][:]): values.append(float(l[comid_index])) pairs = [list(a) for a in zip(dates, values)] response = HttpResponse(content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename={0}-{1}-{2}.csv'.format( country, model, comid) writer = csv_writer(response) writer.writerow(['datetime', 'streamflow (m3/s)']) for row_data in pairs: writer.writerow(row_data) return response except Exception as e: print(e) return JsonResponse( {'error': 'No HIWAT data found for the selected reach.'})
def predict_dataset(filepath, model_dir, url_col=None, progress_hook=None): """ Given a file with urls to images, predict the given SavedModel on the image and write the label and confidene back to the file. :param filepath: path to a valid txt or csv file with image urls to download. :param model_dir: path to the Lobe Tensorflow SavedModel export. :param url_col: if this is a csv, the column header name for the urls to download. :param progress_hook: an optional function that will be run with progress_hook(currentProgress, totalProgress) when progress updates. """ print(f"Predicting {filepath}") filepath = os.path.abspath(filepath) filename, ext = _name_and_extension(filepath) # read the file # if this a .txt file, don't treat the first row as a header. Otherwise, use the first row for header column names. if ext != '.xlsx': csv = pd.read_csv(filepath, header=None if ext == '.txt' else 0) else: csv = pd.read_excel(filepath, header=0) if ext in ['.csv', '.xlsx'] and not url_col: raise ValueError(f"Please specify an image url column for the csv.") url_col_idx = 0 if url_col: try: url_col_idx = list(csv.columns).index(url_col) except ValueError: raise ValueError( f"Image url column {url_col} not found in csv headers {csv.columns}" ) num_items = len(csv) print(f"Predicting {num_items} items...") # load the model print("Loading model...") model = ImageModel.load(model_path=model_dir) print("Model loaded!") # create our output csv fname, ext = os.path.splitext(filepath) out_file = f"{fname}_predictions.csv" with open(out_file, 'w', encoding="utf-8", newline='') as f: # our header names from the pandas columns writer = csv_writer(f) writer.writerow([ *[str(col) if not pd.isna(col) else '' for col in csv.columns], 'label', 'confidence' ]) # iterate over the rows and predict the label with tqdm(total=len(csv)) as pbar: with ThreadPoolExecutor() as executor: model_futures = [] # make our prediction jobs for i, row in enumerate(csv.itertuples(index=False)): url = row[url_col_idx] model_futures.append( executor.submit(predict_image_url, url=url, model=model, row=row)) # write the results from the predict (this should go in order of the futures) for i, future in enumerate(model_futures): label, confidence, row = future.result() with open(out_file, 'a', encoding="utf-8", newline='') as f: writer = csv_writer(f) writer.writerow([ *[str(col) if not pd.isna(col) else '' for col in row], label, confidence ]) pbar.update(1) if progress_hook: progress_hook(i + 1, len(csv))
if gene_to_id_file_location: with open(gene_to_id_file_location, 'r') as source: reader = csv_reader(source, delimiter='\t') print reader.next() for line in reader: genes_to_ids_dict[line[2]] = line[0] with open(data_source_location, 'r') as source: reader = csv_reader(source) for i, line in enumerate(reader): word = line[0] if gene_to_id_file_location: word = genes_to_ids_dict.get(word, 'None found') if word in high_conf_translation_dict.keys(): high_conf_trans.append(high_conf_translation_dict[word]) if word in low_conf_translation_dict.keys(): low_conf_trans.append(low_conf_translation_dict[word]) print "out of %s, %s were translated with high confidence, %s with low and %s were not found" % \ (i, len(high_conf_trans), len(low_conf_trans), i-len(high_conf_trans)-len(low_conf_trans)) with open(data_dump_location, 'w') as destination: writer = csv_writer(destination) writer.writerows((word for word in high_conf_trans))
def main(path_pairs, path_definitions, path_dense_fevents, path_info, output_path, timings_path): # Initialize the CSV output line_buffering = 1 res_file = open(output_path, "x", buffering=line_buffering) res_writer = init_csv(res_file) # File that keep tracks of how much time was spent on each endpoint timings_file = open(timings_path, "x", buffering=line_buffering) timings_writer = csv_writer(timings_file) timings_writer.writerow( ["prior", "outcome", "lag", "step_size", "time_seconds"]) # Load all data pairs, endpoints, df_events, df_info = load_data(path_pairs, path_definitions, path_dense_fevents, path_info) # Initialize the job queue jobs = LifoQueue() for pair in pairs: for lag in LAGS: jobs.put({ "pair": pair, "lag": lag, "step_size": DEFAULT_STEP_SIZE }) # Keep track if the current endpoint pair needs to be skipped skip = None # Run the regression for each job while not jobs.empty(): time_start = now() # Get job info job = jobs.get() pair = job["pair"] lag = job["lag"] step_size = job["step_size"] # Go to next endpoint pair if this one is to be skipped if pair == skip: continue logger.info(f"Jobs remaining: ~ {jobs.qsize()}") logger.info( f"[JOB] pair: {pair} | lag: {lag} | step size: {step_size}") prior, outcome = pair is_sex_specific = pd.notna(endpoints.loc[endpoints.NAME == outcome, "SEX"].iloc[0]) time_start = now() try: (df_unexp, df_unexp_death, df_unexp_exp_p1, df_unexp_exp_p2, df_tri_p1, df_tri_p2) = prep_coxhr(pair, lag, df_events, df_info) nindivs, df_lifelines = prep_lifelines(df_unexp, df_unexp_death, df_unexp_exp_p1, df_unexp_exp_p2, df_tri_p1, df_tri_p2) compute_coxhr(pair, df_lifelines, lag, step_size, is_sex_specific, nindivs, res_writer) except NotEnoughIndividuals as exc: skip = pair # skip remaining jobs (different lags) for this endpoint pair logger.warning(exc) except (ConvergenceError, Warning) as exc: # Retry with a lower step_size if step_size == DEFAULT_STEP_SIZE: step_size = LOWER_STEP_SIZE jobs.put({"pair": pair, "lag": lag, "step_size": step_size}) # We already tried with the lower step size, we have to skip this job else: logger.warning( f"Failed to run Cox.fit() for {pair}, lag: {lag}, step size: {step_size}:\n{exc}" ) finally: job_time = now() - time_start timings_writer.writerow([prior, outcome, lag, step_size, job_time]) timings_file.close() res_file.close()