def read_daily_report_data_csv_JHU(file): """ Extract data from a single daile report file from JHU. :param file: Path to the CSV file :return: """ log.info('Read JHU CSV file {}'.format(file)) countries = NodeSet(['Country'], ['name']) provinces = NodeSet(['Province'], ['name']) updates = NodeSet(['DailyReport'], ['uuid']) province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name']) province_in_country.unique = True province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid']) with open(file, 'rt') as csvfile: rows = csv.reader(csvfile, delimiter=',', quotechar='"') # skip header next(rows) for row in rows: country = row[1] province = row[0] # if no name for province, use country name if not province: province = '{}_complete'.format(country) date = parse(row[2]) uuid = country+province+str(date) confirmed = int(row[3]) if row[3] else 'na' death = int(row[4]) if row[4] else 'na' recovered = int(row[5]) if row[5] else 'na' lat = row[6] if len(row) >= 7 else None long = row[7] if len(row) >= 8 else None province_dict = {'name': province} if lat and long: province_dict['latitude'] = lat province_dict['longitude'] = long provinces.add_unique(province_dict) countries.add_unique({'name': country}) updates.add_unique( {'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid}) province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'}) province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'}) return countries, provinces, updates, province_in_country, province_rep_update
class GtexMetadataParser(ReturnParser): def __init__(self): """ :param mesh_instance: NcbiGene Instance :type mesh_instance: DataSourceInstance """ super(GtexMetadataParser, self).__init__() # NodeSets self.tissues = NodeSet(['GtexTissue'], merge_keys=['name']) self.detailed_tissues = NodeSet(['GtexDetailedTissue'], merge_keys=['name']) self.sample = NodeSet(['GtexSample'], merge_keys=['sid']) self.sample_measures_tissue = RelationshipSet('MEASURES', ['GtexSample'], ['GtexTissue'], ['sid'], ['name']) self.sample_measures_detailed_tissue = RelationshipSet( 'MEASURES', ['GtexSample'], ['GtexDetailedTissue'], ['sid'], ['name']) self.tissue_parent_detailed_tissue = RelationshipSet( 'PARENT', ['GtexTissue'], ['GtexDetailedTissue'], ['name'], ['name']) self.tissue_parent_detailed_tissue.unique = True def run_with_mounted_arguments(self): self.run() def run(self): gtex_instance = self.get_instance_by_name('Gtex') gtext_sample_attribute_file = gtex_instance.get_file( 'GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt') gtex_df = pandas.read_csv(gtext_sample_attribute_file, sep='\t', header=0, index_col=False, encoding="utf-8-sig") for row in gtex_df.itertuples(): sid = row.SAMPID tissue_name = row.SMTS detailed_tissue_name = row.SMTSD props = { 'sid': sid, 'SMATSSCR': row.SMATSSCR, 'SMCENTER': row.SMCENTER, 'SMPTHNTS': row.SMPTHNTS, 'SMRIN': row.SMRIN, 'SMTS': row.SMTS, 'SMTSD': row.SMTSD, 'SMUBRID': row.SMUBRID, 'SMTSISCH': row.SMTSISCH, 'SMTSPAX': row.SMTSPAX, 'SMNABTCH': row.SMNABTCH, 'SMNABTCHT': row.SMNABTCHT, 'SMNABTCHD': row.SMNABTCHD, 'SMGEBTCH': row.SMGEBTCH, 'SMGEBTCHD': row.SMGEBTCHD, 'SMGEBTCHT': row.SMGEBTCHT, 'SMAFRZE': row.SMAFRZE, 'SMGTC': row.SMGTC, 'SME2MPRT': row.SME2MPRT, 'SMCHMPRS': row.SMCHMPRS, 'SMNTRART': row.SMNTRART, 'SMNUMGPS': row.SMNUMGPS, 'SMMAPRT': row.SMMAPRT, 'SMEXNCRT': row.SMEXNCRT, 'SM550NRM': row.SM550NRM, 'SMGNSDTC': row.SMGNSDTC, 'SMUNMPRT': row.SMUNMPRT, 'SM350NRM': row.SM350NRM, 'SMRDLGTH': row.SMRDLGTH, 'SMMNCPB': row.SMMNCPB, 'SME1MMRT': row.SME1MMRT, 'SMSFLGTH': row.SMSFLGTH, 'SMESTLBS': row.SMESTLBS, 'SMMPPD': row.SMMPPD, 'SMNTERRT': row.SMNTERRT, 'SMRRNANM': row.SMRRNANM, 'SMRDTTL': row.SMRDTTL, 'SMVQCFL': row.SMVQCFL, 'SMMNCV': row.SMMNCV, 'SMTRSCPT': row.SMTRSCPT, 'SMMPPDPR': row.SMMPPDPR, 'SMCGLGTH': row.SMCGLGTH, 'SMGAPPCT': row.SMGAPPCT, 'SMUNPDRD': row.SMUNPDRD, 'SMNTRNRT': row.SMNTRNRT, 'SMMPUNRT': row.SMMPUNRT, 'SMEXPEFF': row.SMEXPEFF, 'SMMPPDUN': row.SMMPPDUN, 'SME2MMRT': row.SME2MMRT, 'SME2ANTI': row.SME2ANTI, 'SMALTALG': row.SMALTALG, 'SME2SNSE': row.SME2SNSE, 'SMMFLGTH': row.SMMFLGTH, 'SME1ANTI': row.SME1ANTI, 'SMSPLTRD': row.SMSPLTRD, 'SMBSMMRT': row.SMBSMMRT, 'SME1SNSE': row.SME1SNSE, 'SME1PCTS': row.SME1PCTS, 'SMRRNART': row.SMRRNART, 'SME1MPRT': row.SME1MPRT, 'SMNUM5CD': row.SMNUM5CD, 'SMDPMPRT': row.SMDPMPRT, 'SME2PCTS': row.SME2PCTS } self.sample.add_node(props) self.tissues.add_unique({'name': tissue_name}) self.detailed_tissues.add_unique({'name': detailed_tissue_name}) self.sample_measures_tissue.add_relationship({'sid': sid}, {'name': tissue_name}, {}) self.sample_measures_detailed_tissue.add_relationship( {'sid': sid}, {'name': detailed_tissue_name}, {}) self.tissue_parent_detailed_tissue.add_relationship( {'name': tissue_name}, {'name': detailed_tissue_name}, {})
def read_daily_report_data_csv_JHU(file): """ Extract data from a single daile report file from JHU. Old format (until 03-21-2020) Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude New format: FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key :param file: Path to the CSV file :return: """ log.info('Read JHU CSV file {}'.format(file)) # understand if old fromat ( countries = NodeSet(['Country'], ['name']) provinces = NodeSet(['Province'], ['name']) updates = NodeSet(['DailyReport'], ['uuid']) province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name']) province_in_country.unique = True province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid']) with open(file, 'rt') as csvfile: rows = csv.reader(csvfile, delimiter=',', quotechar='"') # skip header header = next(rows) if len(header) > 8: file_type = 'new' else: file_type = 'old' log.info("File type: {}".format(file_type)) for row in rows: if file_type == 'old': country, province, date, confirmed, death, recovered, lat, long = parse_jhu_old_file_row( row) elif file_type == 'new': country, province, date, confirmed, death, recovered, lat, long = parse_jhu_new_file_row( row) province_dict = {'name': province} if lat and long: province_dict['latitude'] = lat province_dict['longitude'] = long uuid = country + province + str(date) provinces.add_unique(province_dict) countries.add_unique({'name': country}) updates.add_unique({ 'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid }) province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'}) province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'}) return countries, provinces, updates, province_in_country, province_rep_update