Exemple #1
0
def read_daily_report_data_csv_JHU(file):
    """
    Extract data from a single daile report file from JHU.

    :param file: Path to the CSV file
    :return:
    """
    log.info('Read JHU CSV file {}'.format(file))

    countries = NodeSet(['Country'], ['name'])
    provinces = NodeSet(['Province'], ['name'])
    updates = NodeSet(['DailyReport'], ['uuid'])
    province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name'])
    province_in_country.unique = True
    province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid'])

    with open(file, 'rt') as csvfile:
        rows = csv.reader(csvfile, delimiter=',', quotechar='"')
        # skip header
        next(rows)

        for row in rows:
            country = row[1]
            province = row[0]
            # if no name for province, use country name
            if not province:
                province = '{}_complete'.format(country)

            date = parse(row[2])
            uuid = country+province+str(date)
            confirmed = int(row[3]) if row[3] else 'na'
            death = int(row[4]) if row[4] else 'na'
            recovered = int(row[5]) if row[5] else 'na'

            lat = row[6] if len(row) >= 7 else None
            long = row[7] if len(row) >= 8 else None

            province_dict = {'name': province}
            if lat and long:
                province_dict['latitude'] = lat
                province_dict['longitude'] = long
            provinces.add_unique(province_dict)

            countries.add_unique({'name': country})

            updates.add_unique(
                {'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid})

            province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'})
            province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'})

    return countries, provinces, updates, province_in_country, province_rep_update
Exemple #2
0
class GtexMetadataParser(ReturnParser):
    def __init__(self):
        """

        :param mesh_instance: NcbiGene Instance
        :type mesh_instance: DataSourceInstance
        """
        super(GtexMetadataParser, self).__init__()

        # NodeSets
        self.tissues = NodeSet(['GtexTissue'], merge_keys=['name'])
        self.detailed_tissues = NodeSet(['GtexDetailedTissue'],
                                        merge_keys=['name'])
        self.sample = NodeSet(['GtexSample'], merge_keys=['sid'])

        self.sample_measures_tissue = RelationshipSet('MEASURES',
                                                      ['GtexSample'],
                                                      ['GtexTissue'], ['sid'],
                                                      ['name'])
        self.sample_measures_detailed_tissue = RelationshipSet(
            'MEASURES', ['GtexSample'], ['GtexDetailedTissue'], ['sid'],
            ['name'])
        self.tissue_parent_detailed_tissue = RelationshipSet(
            'PARENT', ['GtexTissue'], ['GtexDetailedTissue'], ['name'],
            ['name'])
        self.tissue_parent_detailed_tissue.unique = True

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):
        gtex_instance = self.get_instance_by_name('Gtex')

        gtext_sample_attribute_file = gtex_instance.get_file(
            'GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt')

        gtex_df = pandas.read_csv(gtext_sample_attribute_file,
                                  sep='\t',
                                  header=0,
                                  index_col=False,
                                  encoding="utf-8-sig")

        for row in gtex_df.itertuples():
            sid = row.SAMPID
            tissue_name = row.SMTS
            detailed_tissue_name = row.SMTSD

            props = {
                'sid': sid,
                'SMATSSCR': row.SMATSSCR,
                'SMCENTER': row.SMCENTER,
                'SMPTHNTS': row.SMPTHNTS,
                'SMRIN': row.SMRIN,
                'SMTS': row.SMTS,
                'SMTSD': row.SMTSD,
                'SMUBRID': row.SMUBRID,
                'SMTSISCH': row.SMTSISCH,
                'SMTSPAX': row.SMTSPAX,
                'SMNABTCH': row.SMNABTCH,
                'SMNABTCHT': row.SMNABTCHT,
                'SMNABTCHD': row.SMNABTCHD,
                'SMGEBTCH': row.SMGEBTCH,
                'SMGEBTCHD': row.SMGEBTCHD,
                'SMGEBTCHT': row.SMGEBTCHT,
                'SMAFRZE': row.SMAFRZE,
                'SMGTC': row.SMGTC,
                'SME2MPRT': row.SME2MPRT,
                'SMCHMPRS': row.SMCHMPRS,
                'SMNTRART': row.SMNTRART,
                'SMNUMGPS': row.SMNUMGPS,
                'SMMAPRT': row.SMMAPRT,
                'SMEXNCRT': row.SMEXNCRT,
                'SM550NRM': row.SM550NRM,
                'SMGNSDTC': row.SMGNSDTC,
                'SMUNMPRT': row.SMUNMPRT,
                'SM350NRM': row.SM350NRM,
                'SMRDLGTH': row.SMRDLGTH,
                'SMMNCPB': row.SMMNCPB,
                'SME1MMRT': row.SME1MMRT,
                'SMSFLGTH': row.SMSFLGTH,
                'SMESTLBS': row.SMESTLBS,
                'SMMPPD': row.SMMPPD,
                'SMNTERRT': row.SMNTERRT,
                'SMRRNANM': row.SMRRNANM,
                'SMRDTTL': row.SMRDTTL,
                'SMVQCFL': row.SMVQCFL,
                'SMMNCV': row.SMMNCV,
                'SMTRSCPT': row.SMTRSCPT,
                'SMMPPDPR': row.SMMPPDPR,
                'SMCGLGTH': row.SMCGLGTH,
                'SMGAPPCT': row.SMGAPPCT,
                'SMUNPDRD': row.SMUNPDRD,
                'SMNTRNRT': row.SMNTRNRT,
                'SMMPUNRT': row.SMMPUNRT,
                'SMEXPEFF': row.SMEXPEFF,
                'SMMPPDUN': row.SMMPPDUN,
                'SME2MMRT': row.SME2MMRT,
                'SME2ANTI': row.SME2ANTI,
                'SMALTALG': row.SMALTALG,
                'SME2SNSE': row.SME2SNSE,
                'SMMFLGTH': row.SMMFLGTH,
                'SME1ANTI': row.SME1ANTI,
                'SMSPLTRD': row.SMSPLTRD,
                'SMBSMMRT': row.SMBSMMRT,
                'SME1SNSE': row.SME1SNSE,
                'SME1PCTS': row.SME1PCTS,
                'SMRRNART': row.SMRRNART,
                'SME1MPRT': row.SME1MPRT,
                'SMNUM5CD': row.SMNUM5CD,
                'SMDPMPRT': row.SMDPMPRT,
                'SME2PCTS': row.SME2PCTS
            }

            self.sample.add_node(props)
            self.tissues.add_unique({'name': tissue_name})
            self.detailed_tissues.add_unique({'name': detailed_tissue_name})

            self.sample_measures_tissue.add_relationship({'sid': sid},
                                                         {'name': tissue_name},
                                                         {})
            self.sample_measures_detailed_tissue.add_relationship(
                {'sid': sid}, {'name': detailed_tissue_name}, {})

            self.tissue_parent_detailed_tissue.add_relationship(
                {'name': tissue_name}, {'name': detailed_tissue_name}, {})
Exemple #3
0
def read_daily_report_data_csv_JHU(file):
    """
    Extract data from a single daile report file from JHU.

    Old format (until 03-21-2020)
        Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
    New format:
        FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key

    :param file: Path to the CSV file
    :return:
    """
    log.info('Read JHU CSV file {}'.format(file))
    # understand if old fromat (

    countries = NodeSet(['Country'], ['name'])
    provinces = NodeSet(['Province'], ['name'])
    updates = NodeSet(['DailyReport'], ['uuid'])
    province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'],
                                          ['name'], ['name'])
    province_in_country.unique = True
    province_rep_update = RelationshipSet('REPORTED', ['Province'],
                                          ['DailyReport'], ['name'], ['uuid'])

    with open(file, 'rt') as csvfile:
        rows = csv.reader(csvfile, delimiter=',', quotechar='"')
        # skip header
        header = next(rows)
        if len(header) > 8:
            file_type = 'new'
        else:
            file_type = 'old'
        log.info("File type: {}".format(file_type))

        for row in rows:

            if file_type == 'old':
                country, province, date, confirmed, death, recovered, lat, long = parse_jhu_old_file_row(
                    row)
            elif file_type == 'new':
                country, province, date, confirmed, death, recovered, lat, long = parse_jhu_new_file_row(
                    row)

            province_dict = {'name': province}
            if lat and long:
                province_dict['latitude'] = lat
                province_dict['longitude'] = long

            uuid = country + province + str(date)

            provinces.add_unique(province_dict)

            countries.add_unique({'name': country})

            updates.add_unique({
                'date': date,
                'confirmed': confirmed,
                'death': death,
                'recovered': recovered,
                'uuid': uuid
            })

            province_in_country.add_relationship({'name': province},
                                                 {'name': country},
                                                 {'source': 'jhu'})
            province_rep_update.add_relationship({'name': province},
                                                 {'uuid': uuid},
                                                 {'source': 'jhu'})

    return countries, provinces, updates, province_in_country, province_rep_update