예제 #1
0
    def __init__(self):
        super(SomeParser, self).__init__()

        self.source = NodeSet(['Source'], merge_keys=['source_id'])
        self.target = NodeSet(['Target'], merge_keys=['target_id'])
        self.rels = RelationshipSet('FOO', ['Source'], ['Target'],
                                    ['source_id'], ['target_id'])
예제 #2
0
    def __init__(self):
        """
        :param ensembl_instance: The ENSEMBL DataSource instance.
        """
        super(EnsemblEntityParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # NodeSets
        self.genes = NodeSet(['Gene'],
                             merge_keys=['sid'],
                             default_props={'source': 'ensembl'})
        self.transcripts = NodeSet(['Transcript'],
                                   merge_keys=['sid'],
                                   default_props={'source': 'ensembl'})
        self.proteins = NodeSet(['Protein'],
                                merge_keys=['sid'],
                                default_props={'source': 'ensembl'})

        # RelationshipSets
        self.gene_codes_transcript = RelationshipSet(
            'CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'],
            default_props={'source': 'ensembl'})
        self.transcript_codes_protein = RelationshipSet(
            'CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'],
            default_props={'source': 'ensembl'})
예제 #3
0
    def __init__(self):

        super(NcbiGeneParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # output data
        # both gene IDs and GeneSymbols have the label 'Gene'
        # two different NodeSets are used because only the GeneSymbol nodes need taxid for uniqueness
        self.genes = NodeSet(['Gene'],
                             merge_keys=['sid'],
                             default_props={'source': 'ncbigene'})
        self.genesymbols = NodeSet(['Gene'],
                                   merge_keys=['sid', 'taxid'],
                                   default_props={
                                       'source': 'ncbigene',
                                       'type': 'symbol'
                                   })
        self.genesymbol_synonym_genesymbol = RelationshipSet(
            'SYNONYM', ['Gene'], ['Gene'], ['sid', 'taxid'], ['sid', 'taxid'],
            default_props={'source': 'ncbigene'})
        self.gene_maps_genesymbol = RelationshipSet(
            'MAPS', ['Gene'], ['Gene'], ['sid'], ['sid', 'taxid'],
            default_props={'source': 'ncbigene'})
예제 #4
0
    def __init__(self):
        """

        :param mesh_instance: NcbiGene Instance
        :type mesh_instance: DataSourceInstance
        """
        super(GtexMetadataParser, self).__init__()

        # NodeSets
        self.tissues = NodeSet(['GtexTissue'], merge_keys=['name'])
        self.detailed_tissues = NodeSet(['GtexDetailedTissue'],
                                        merge_keys=['name'])
        self.sample = NodeSet(['GtexSample'], merge_keys=['sid'])

        self.sample_measures_tissue = RelationshipSet('MEASURES',
                                                      ['GtexSample'],
                                                      ['GtexTissue'], ['sid'],
                                                      ['name'])
        self.sample_measures_detailed_tissue = RelationshipSet(
            'MEASURES', ['GtexSample'], ['GtexDetailedTissue'], ['sid'],
            ['name'])
        self.tissue_parent_detailed_tissue = RelationshipSet(
            'PARENT', ['GtexTissue'], ['GtexDetailedTissue'], ['name'],
            ['name'])
        self.tissue_parent_detailed_tissue.unique = True
예제 #5
0
    def __init__(self):
        super(LncipediaParser, self).__init__()

        self.genes = NodeSet(['Gene'], merge_keys=['sid'])
        self.transcripts = NodeSet(['Transcript'], merge_keys=['sid'])
        self.gene_codes_transcripts = RelationshipSet('CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'])
        self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'], ['sid'], ['sid'])
        self.transcript_maps_transcript = RelationshipSet('MAPS', ['Transcript'], ['Transcript'], ['sid'], ['sid'])
예제 #6
0
    def __init__(self):

        super(RefseqEntityParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # define NodeSet and RelationshipSet
        self.transcripts = NodeSet(['Transcript'], merge_keys=['sid'], default_props={'source': 'refseq'})
        self.proteins = NodeSet(['Protein'], merge_keys=['sid'], default_props={'source': 'refseq'})
예제 #7
0
    def __init__(self):
        super(DummyParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # output data
        self.dummy_nodes = NodeSet(['Dummy'], merge_keys=['sid'])
        self.fummy_nodes = NodeSet(['Fummy'], merge_keys=['sid'])

        self.dummy_knows_fummy = RelationshipSet('KNOWS', ['Dummy'], ['Fummy'],
                                                 ['sid'], ['sid'])
예제 #8
0
def load_wpp_data(base_path, graph):
    """
    Load UN population data.

    :param base_path: Path where file was downloaded.
    """
    un_wpp_csv_file = os.path.join(base_path, 'WPP2019_PopulationByAgeSex_Medium.csv')
    log.info('Parse UN population data file: {}'.format(un_wpp_csv_file))

    country = NodeSet(['Country'], ['name'])
    age_group_nodes = NodeSet(['AgeGroup'], ['group'])
    country_total_group = RelationshipSet('CURRENT_TOTAL', ['Country'], ['AgeGroup'], ['name'], ['group'])
    country_male_group = RelationshipSet('CURRENT_MALE', ['Country'], ['AgeGroup'], ['name'], ['group'])
    country_female_group = RelationshipSet('CURRENT_FEMALE', ['Country'], ['AgeGroup'], ['name'], ['group'])

    countries_added = set()
    age_groups_added = set()

    with open(un_wpp_csv_file, 'rt') as f:
        csv_file = csv.reader(f, delimiter=',', quotechar='"')
        # skip header
        next(csv_file)
        for row in csv_file:
            # LocID,Location,VarID,Variant,Time,MidPeriod,AgeGrp,AgeGrpStart,AgeGrpSpan,PopMale,PopFemale,PopTotal
            loc_id = row[0]
            location = row[1]
            time = int(row[4])
            age_group = row[6]
            age_group_start = int(row[7])
            age_group_span = row[8]
            pop_male = int(float((row[9])) * 1000)
            pop_female = int(float((row[10])) * 1000)
            pop_total = int(float((row[11])) * 1000)

            # only take 2019
            if time == 2019:
                if location not in countries_added:
                    country.add_node({'name': location, 'un_id': loc_id})
                    countries_added.add(location)
                if age_group not in age_groups_added:
                    age_group_nodes.add_node({'group': age_group, 'start': age_group_start, 'span': age_group_span})

                country_total_group.add_relationship({'name': location}, {'group': age_group}, {'count': pop_total})
                country_male_group.add_relationship({'name': location}, {'group': age_group}, {'count': pop_male})
                country_female_group.add_relationship({'name': location}, {'group': age_group}, {'count': pop_female})

    log.info('Load data to Neo4j')
    country.merge(graph)
    age_group_nodes.merge(graph)
    country_total_group.merge(graph)
    country_male_group.merge(graph)
    country_female_group.merge(graph)
예제 #9
0
def read_daily_report_data_csv_JHU(file):
    """
    Extract data from a single daile report file from JHU.

    :param file: Path to the CSV file
    :return:
    """
    log.info('Read JHU CSV file {}'.format(file))

    countries = NodeSet(['Country'], ['name'])
    provinces = NodeSet(['Province'], ['name'])
    updates = NodeSet(['DailyReport'], ['uuid'])
    province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name'])
    province_in_country.unique = True
    province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid'])

    with open(file, 'rt') as csvfile:
        rows = csv.reader(csvfile, delimiter=',', quotechar='"')
        # skip header
        next(rows)

        for row in rows:
            country = row[1]
            province = row[0]
            # if no name for province, use country name
            if not province:
                province = '{}_complete'.format(country)

            date = parse(row[2])
            uuid = country+province+str(date)
            confirmed = int(row[3]) if row[3] else 'na'
            death = int(row[4]) if row[4] else 'na'
            recovered = int(row[5]) if row[5] else 'na'

            lat = row[6] if len(row) >= 7 else None
            long = row[7] if len(row) >= 8 else None

            province_dict = {'name': province}
            if lat and long:
                province_dict['latitude'] = lat
                province_dict['longitude'] = long
            provinces.add_unique(province_dict)

            countries.add_unique({'name': country})

            updates.add_unique(
                {'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid})

            province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'})
            province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'})

    return countries, provinces, updates, province_in_country, province_rep_update
예제 #10
0
    def __init__(self):
        super(RefseqRemovedRecordsParser, self).__init__()

        self.arguments = ['taxid']

        self.legacy_ids = set()

        self.legacy_transcripts = NodeSet(['Transcript', 'Legacy'], merge_keys=['sid'], default_props={'source': 'refseq'})
        self.legacy_transcript_now_transcript = RelationshipSet('REPLACED_BY', ['Transcript'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.legacy_proteins = NodeSet(['Protein', 'Legacy'], merge_keys=['sid'], default_props={'source': 'refseq'})
        self.legacy_protein_now_protein = RelationshipSet('REPLACED_BY', ['Protein'], ['Protein'],
                                                                ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.gene_codes_legacy_transcript = RelationshipSet('CODES', ['Gene'], ['Transcript', 'Legacy'], ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.legacy_transcript_codes_protein = RelationshipSet('CODES', ['Transcript', 'Legacy'], ['Protein'],
                                                               ['sid'], ['sid'], default_props={'source': 'refseq'})
예제 #11
0
    def __init__(self):
        super(MirbaseParser, self).__init__()

        # NodeSets
        self.precursor_mirna = NodeSet(['PrecursorMirna'], merge_keys=['sid'])
        self.mature_mirna = NodeSet(['Mirna'], merge_keys=['sid'])
        # RelationshipSets
        self.precursor_codes_mature = RelationshipSet('PRE',
                                                      ['PrecursorMirna'],
                                                      ['Mirna'], ['sid'],
                                                      ['sid'])
        self.transcript_codes_precursor = RelationshipSet(
            'IS', ['Transcript'], ['PrecursorMirna'], ['sid'], ['sid'])
        self.gene_is_precursor = RelationshipSet('IS', ['Gene'],
                                                 ['PrecursorMirna'], ['sid'],
                                                 ['sid'])
예제 #12
0
    def __init__(self):
        """
        """
        super(BigWordListParser, self).__init__()

        # NodeSets
        self.words = NodeSet(['Word'], merge_keys=['value'])
 def _add_node(self, node):
     # create nodeSet if necessary
     labels = frozenset(node.labels)
     if not labels in self.nodeSets:
         # get primary keys
         self.nodeSets[labels] = NodeSet(
             list(labels), merge_keys=self._get_merge_keys(node))
     # add node to nodeset
     self.nodeSets[labels].nodes.append(node)
예제 #14
0
    def __init__(self):
        super(MeshParser, self).__init__()

        # NodeSets
        self.descriptor = NodeSet(['MeshDescriptor'], merge_keys=['sid'])
        self.qualifier = NodeSet(['MeshQualifier'], merge_keys=['sid'])
        self.concept = NodeSet(['MeshConcept'], merge_keys=['sid'])
        self.term = NodeSet(['MeshTerm'], merge_keys=['sid'])

        self.descriptor_allowed_qualifier = RelationshipSet('ALLOWED', ['MeshDescriptor'], ['MeshQualifier'], ['sid'],
                                                            ['sid'])

        self.descriptor_has_concept = RelationshipSet('HAS', ['MeshDescriptor'], ['MeshConcept'], ['sid'], ['sid'])
        self.descriptor_has_concept.unique = True
        self.concept_has_term = RelationshipSet('HAS', ['MeshConcept'], ['MeshTerm'], ['sid'], ['sid'])
        self.concept_has_term.unique = True
        self.concept_related_concept = RelationshipSet('RELATED', ['MeshConcept'], ['MeshConcept'], ['sid'], ['sid'])
        self.concept_related_concept.unique = True
예제 #15
0
    def __init__(self):
        super(NcbiLegacyGeneParser, self).__init__()

        self.arguments = ['taxid']

        self.legacy_genes = NodeSet(['Gene', 'Legacy'],
                                    merge_keys=['sid'],
                                    default_props={'source': 'ncbigene'})
        self.legacy_gene_now_gene = RelationshipSet(
            'REPLACED_BY', ['Gene', 'Legacy'], ['Gene'], ['sid'], ['sid'],
            default_props={'source': 'ncbigene'})
예제 #16
0
    def __init__(self):

        super(SwissLipidsParser, self).__init__()

        # define NodeSet and RelationshipSet
        self.lipids = NodeSet(['Lipid'], merge_keys=['sid'])

        self.lipid_fromclass_lipid = RelationshipSet('FROM_LIPID_CLASS', ['Lipid'], ['Lipid'], ['sid'], ['sid'])
        self.lipid_parent_lipid = RelationshipSet('HAS_PARENT', ['Lipid'], ['Lipid'], ['sid'], ['sid'])
        self.lipid_component_lipid = RelationshipSet('HAS_COMPONENT', ['Lipid'], ['Lipid'], ['sid'], ['sid'])
        self.lipid_maps_metabolite = RelationshipSet('MAPS', ['Lipid'], ['Metabolite'], ['sid'], ['sid'])
        self.lipid_associates_protein = RelationshipSet('HAS_ASSOCIATION', ['Lipid'], ['Protein'], ['sid'], ['sid'])
예제 #17
0
    def __init__(self):
        """
        :param ensembl_instance: The ENSEMBL DataSource instance.
        """
        super(EnsemblLocusParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # NodeSets
        self.locus = NodeSet(['Locus'],
                             merge_keys=['uuid'],
                             default_props={'source': 'ensembl'})
예제 #18
0
    def __init__(self):
        super(HmdbParser, self).__init__()

        # NodeSets
        self.metabolites = NodeSet(['Metabolite'],
                                   merge_keys=['sid'],
                                   default_props={'source': 'hmdb'})

        self.metabolite_map_metabolite = RelationshipSet(
            'MAPS', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'hmdb'})
        self.metabolite_associates_protein = RelationshipSet(
            'HAS_ASSOCIATION', ['Metabolite'], ['Protein'], ['sid'], ['sid'],
            default_props={'source': 'hmdb'})
예제 #19
0
    def __init__(self):
        super(ChebiParser, self).__init__()

        # NodeSets
        self.metabolites = NodeSet(['Metabolite'],
                                   merge_keys=['sid'],
                                   default_props={'source': 'chebi'})
        self.metabolite_isa_metabolite = RelationshipSet(
            'IS_A', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'chebi'})
        self.metabolite_rel_metabolite = RelationshipSet(
            'CHEBI_REL', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'chebi'})
        self.metabolite_maps_metabolite = RelationshipSet(
            'MAPS', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'chebi'})
예제 #20
0
    def __init__(self):
        """
        :param uniprot_instance: The Uniprot instance
        :param taxid: The taxid
        """
        super(UniprotKnowledgebaseParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # NodeSet
        self.proteins = NodeSet(['Protein'], merge_keys=['sid'], default_props={'source': 'uniprot'})

        # RelationshipSet
        self.protein_primary_protein = RelationshipSet('PRIMARY', ['Protein'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'uniprot'})
        self.transcript_codes_protein = RelationshipSet('CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'uniprot'})
        self.protein_maps_protein = RelationshipSet('MAPS', ['Protein'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'uniprot'})
예제 #21
0
    def __init__(self):
        """

        :param ncbigene_instance: NcbiGene Instance
        :type ncbigene_instance: DataSourceInstance
        :param taxid:
        """
        super(HGNCParser, self).__init__()

        # output data
        self.genes = NodeSet(['Gene'], merge_keys=['sid'])

        self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'],
                                              ['sid'], ['sid'])
        self.gene_maps_genesymbol = RelationshipSet('MAPS', ['Gene'],
                                                    ['GeneSymbol'], ['sid'],
                                                    ['sid', 'taxid'])
    def _define_node_and_relatinship_sets(self):
        # Define nodesets
        nodeSets = {}
        relSets = {}
        nodeSets["Papers"] = NodeSet(["Paper"], ["paper_id"])
        nodeSets["PaperIDHubs"] = NodeSet(
            [self.id_node_label, config.JSON2GRAPH_COLLECTION_NODE_LABEL],
            ["id"])
        nodeSets["Metadata"] = NodeSet(["Metadata"], ["_hash_id"])
        nodeSets["Authors"] = NodeSet(["Author"], ["_hash_id"])
        nodeSets["AuthorHubs"] = NodeSet(
            ["Author", config.JSON2GRAPH_COLLECTION_NODE_LABEL], ["id"])
        nodeSets["Abstracts"] = NodeSet(["Abstract"], ["_hash_id"])
        nodeSets["AbstractHubs"] = NodeSet(
            ["Abstract", config.JSON2GRAPH_COLLECTION_NODE_LABEL], ["id"])

        relSets["PAPER_HAS_PAPERID_COLLECTION"] = RelationshipSet(
            rel_type="PAPER_HAS_PAPERID_COLLECTION",
            start_node_labels=["Paper"],
            end_node_labels=[
                self.id_node_label,
                config.JSON2GRAPH_COLLECTION_NODE_LABEL,
            ],
            start_node_properties=["paper_id"],
            end_node_properties=["id"],
        )

        relSets["PAPER_HAS_METADATA"] = RelationshipSet(
            rel_type="PAPER_HAS_METADATA",
            start_node_labels=["Paper"],
            end_node_labels=["Metadata"],
            start_node_properties=["paper_id"],
            end_node_properties=["_hash_id"],
        )

        relSets["METADATA_HAS_AUTHORHUB"] = RelationshipSet(
            rel_type="METADATA_HAS_AUTHOR",
            start_node_labels=["Metadata"],
            end_node_labels=[
                "Author", config.JSON2GRAPH_COLLECTION_NODE_LABEL
            ],
            start_node_properties=["_hash_id"],
            end_node_properties=["id"],
        )
        relSets["AUTHORHUB_HAS_AUTHOR"] = RelationshipSet(
            rel_type="AUTHOR_HAS_AUTHOR",
            start_node_labels=[
                "Author", config.JSON2GRAPH_COLLECTION_NODE_LABEL
            ],
            end_node_labels=["Author"],
            start_node_properties=["id"],
            end_node_properties=["_hash_id"],
        )

        relSets["PAPER_HAS_ABSTRACTHUB"] = RelationshipSet(
            rel_type="PAPER_HAS_ABSTRACT",
            start_node_labels=["Paper"],
            end_node_labels=[
                "Abstract", config.JSON2GRAPH_COLLECTION_NODE_LABEL
            ],
            start_node_properties=["paper_id"],
            end_node_properties=["id"],
        )
        relSets["ABSTRACTHUB_HAS_ABSTRACT"] = RelationshipSet(
            rel_type="ABSTRACT_HAS_ABSTRACT",
            start_node_labels=[
                "Abstract", config.JSON2GRAPH_COLLECTION_NODE_LABEL
            ],
            end_node_labels=["Abstract"],
            start_node_properties=["id"],
            end_node_properties=["_hash_id"],
        )

        # Define id nodes and relations
        for col_name, node_props in self.id_columns.items():
            nodeSets[node_props["label"]] = NodeSet(
                [self.id_node_label, node_props["label"]], ["id"])
            relSets[node_props["label"]] = RelationshipSet(
                rel_type="PAPERID_COLLECTION_HAS_PAPERID",
                start_node_labels=[
                    self.id_node_label,
                    config.JSON2GRAPH_COLLECTION_NODE_LABEL,
                ],
                end_node_labels=[self.id_node_label, node_props["label"]],
                start_node_properties=["id"],
                end_node_properties=[node_props["attr"]],
            )

        self.nodeSets = nodeSets
        self.relSets = relSets
예제 #23
0
NEO4J_USER = '******'
NEO4J_PASSWORD = '******'

graph = py2neo.Graph(host=NEO4J_HOST, user=NEO4J_USER, password=NEO4J_PASSWORD)
graph.run("MATCH (a) RETURN a LIMIT 1")

# Download file from NCBI FTP Server
print('Download file from NCBI FTP server')
with urlopen(
        'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz'
) as r:
    with open(DOWNLOAD_FILE_PATH, 'wb') as f:
        shutil.copyfileobj(r, f)

# define NodeSet and RelationshipSet
ncbi_gene_nodes = NodeSet(['Gene'], ['gene_id'])
ensembl_gene_nodes = NodeSet(['Gene'], ['gene_id'])
gene_mapping_rels = RelationshipSet('MAPS', ['Gene'], ['Gene'], ['gene_id'],
                                    ['gene_id'])

# iterate the data file and extract nodes/relationships
print('Iterate file and create nodes/relationships')
# collect mapped ENSEMBL gene IDs to avoid duplicate genes
ensembl_gene_ids_added = set()

with gzip.open(DOWNLOAD_FILE_PATH, 'rt') as file:
    # skip header line
    next(file)
    # iterate file
    for line in file:
        fields = line.strip().split('\t')
예제 #24
0
            def __init__(self):
                super(SimpleTestYieldParser, self).__init__()

                self.source = NodeSet(['YieldSource'], merge_keys=['uid'])
예제 #25
0
    def __init__(self):
        super(RootTestParser, self).__init__()

        self.source = NodeSet(['Source'], merge_keys=['source_id'])
        self.target = NodeSet(['Target'], merge_keys=['target_id'])
예제 #26
0
def read_daily_report_data_csv_JHU(file):
    """
    Extract data from a single daile report file from JHU.

    Old format (until 03-21-2020)
        Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
    New format:
        FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key

    :param file: Path to the CSV file
    :return:
    """
    log.info('Read JHU CSV file {}'.format(file))
    # understand if old fromat (

    countries = NodeSet(['Country'], ['name'])
    provinces = NodeSet(['Province'], ['name'])
    updates = NodeSet(['DailyReport'], ['uuid'])
    province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'],
                                          ['name'], ['name'])
    province_in_country.unique = True
    province_rep_update = RelationshipSet('REPORTED', ['Province'],
                                          ['DailyReport'], ['name'], ['uuid'])

    with open(file, 'rt') as csvfile:
        rows = csv.reader(csvfile, delimiter=',', quotechar='"')
        # skip header
        header = next(rows)
        if len(header) > 8:
            file_type = 'new'
        else:
            file_type = 'old'
        log.info("File type: {}".format(file_type))

        for row in rows:

            if file_type == 'old':
                country, province, date, confirmed, death, recovered, lat, long = parse_jhu_old_file_row(
                    row)
            elif file_type == 'new':
                country, province, date, confirmed, death, recovered, lat, long = parse_jhu_new_file_row(
                    row)

            province_dict = {'name': province}
            if lat and long:
                province_dict['latitude'] = lat
                province_dict['longitude'] = long

            uuid = country + province + str(date)

            provinces.add_unique(province_dict)

            countries.add_unique({'name': country})

            updates.add_unique({
                'date': date,
                'confirmed': confirmed,
                'death': death,
                'recovered': recovered,
                'uuid': uuid
            })

            province_in_country.add_relationship({'name': province},
                                                 {'name': country},
                                                 {'source': 'jhu'})
            province_rep_update.add_relationship({'name': province},
                                                 {'uuid': uuid},
                                                 {'source': 'jhu'})

    return countries, provinces, updates, province_in_country, province_rep_update