Beispiel #1
0
    def test_get_from_neo_with_unlimited(self):
        df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo(
            'MATCH (n:Tweet) WHERE n.id>1000 RETURN n.id', limit=None)
        assert len(df) > 1000

        df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo(
            'MATCH (n:Tweet) WHERE n.hydrated=\'FULL\' RETURN n.id LIMIT 5', limit=None)
        assert len(df) == 5
Beispiel #2
0
 def test_save_enrichment_df_to_graph_wrong_parameter_types(self):
     with pytest.raises(TypeError) as excinfo:
         res = Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph(
             'test', pd.DataFrame(), 'test')
     assert "label parameter" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
         res = Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph(
             Neo4jDataAccess.NodeLabel.Tweet, [], 'test')
     assert "Pandas.DataFrame" in str(excinfo.value)
Beispiel #3
0
    def test_save_enrichment_df_to_graph_new_nodes(self):
        df = pd.DataFrame([{'id': 555, 'text': 'Tweet 123'},
                           {'id': 666, 'text': 'Tweet 234'}
                           ])
        Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph(
            Neo4jDataAccess.NodeLabel.Tweet, df, 'test')

        df = Neo4jDataAccess(neo4j_creds=self.creds).get_tweet_by_id(
            df['id'].to_frame())
        assert len(df) == 2
        assert df.at[0, 'text'] == 'Tweet 123'
        assert df.at[1, 'text'] == 'Tweet 234'
Beispiel #4
0
    def test_save_enrichment_df_to_graph_multiple_properties(self):
        df = pd.DataFrame([{'id': 777, 'text': 'Tweet 123', 'favorite_count': 2},
                           {'id': 888, 'text': 'Tweet 234', 'favorite_count': 3}
                           ])
        Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph(
            Neo4jDataAccess.NodeLabel.Tweet, df, 'test')

        df = Neo4jDataAccess(neo4j_creds=self.creds).get_tweet_by_id(
            df['id'].to_frame())
        assert len(df) == 2
        assert df.at[0, 'text'] == 'Tweet 123'
        assert df.at[0, 'favorite_count'] == 2
        assert df.at[1, 'text'] == 'Tweet 234'
        assert df.at[1, 'favorite_count'] == 3
Beispiel #5
0
 def test_save_parquet_to_graph(self):
     filename = os.path.join(os.path.dirname(__file__),
                             'data/2020_03_22_02_b1.snappy2.parquet')
     tdf = pd.read_parquet(filename, engine='pyarrow')
     Neo4jDataAccess(
         neo4j_creds=self.creds).save_parquet_df_to_graph(tdf, 'dave')
     assert True
Beispiel #6
0
    def test_save_enrichment_df_to_graph_property_does_not_exist(self):
        df = pd.DataFrame([{'id': 777, 'text': 'Tweet 123', 'new_prop': 2}])
        with pytest.raises(Exception) as excinfo:
            Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph(
                Neo4jDataAccess.NodeLabel.Tweet, df, 'test')

        assert "create_propertykey" in str(excinfo.value)
Beispiel #7
0
 def test_get_tweet_by_id(self):
     df = pd.DataFrame([{'id': 1}])
     df = Neo4jDataAccess(neo4j_creds=self.creds).get_tweet_by_id(df)
     assert len(df) == 1
     assert df.at[0, 'id'] == 1
     assert df.at[0, 'text'] == 'Tweet 1'
     assert df.at[0, 'hydrated'] == 'FULL'
Beispiel #8
0
 def test_get_tweet_by_id_with_cols(self):
     df = pd.DataFrame([{"id": 1}])
     df = Neo4jDataAccess(
         neo4j_creds=self.creds).get_tweet_by_id(df, cols=['id', 'text'])
     assert len(df) == 1
     assert len(df.columns) == 2
     assert df.at[0, 'id'] == 1
     assert df.at[0, 'text'] == 'Tweet 1'
Beispiel #9
0
    def test_get_tweet_hydrated_status_by_id(self):
        df = Neo4jDataAccess(
            neo4j_creds=self.creds).get_tweet_hydrated_status_by_id(self.ids)

        assert len(df) == 5
        assert df[df['id'] == 1]['hydrated'][0] == 'FULL'
        assert df[df['id'] == 2]['hydrated'][1] == 'FULL'
        assert df[df['id'] == 3]['hydrated'][2] == None
        assert df[df['id'] == 4]['hydrated'][3] == 'PARTIAL'
        assert df[df['id'] == 5]['hydrated'][4] == 'PARTIAL'
Beispiel #10
0
 def test_get_tweet_by_id_wrong_parameter(self):
     with pytest.raises(TypeError) as excinfo:
         df = Neo4jDataAccess(
             neo4j_creds=self.creds).get_tweet_by_id('test')
     assert "df" in str(excinfo.value)
Beispiel #11
0
 def test_get_from_neo_with_limit_only(self):
     df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo(
         'MATCH (n:Tweet) WHERE n.hydrated=\'FULL\' RETURN n.id, n.text', limit=1)
     assert len(df) == 1
     assert len(df.columns) == 2
Beispiel #12
0
 def test_get_from_neo(self):
     df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo(
         'MATCH (n:Tweet) WHERE n.hydrated=\'FULL\' RETURN n.id, n.text LIMIT 5')
     assert len(df) == 5
     assert len(df.columns) == 2
Beispiel #13
0
                          1219746235038474243,
                          1219746508955967488,
                          1219746544955453441]
                   })
# DEBUG, INFO, WARNING, ERROR, CRITICAL
logging.getLogger().setLevel(logging.WARNING)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)


# Test save_parquet_df_to_graph
print('----')
print('Testing Parquet Save')
tdf = pd.read_parquet(
    './data/2020_03_22_02_b1.snappy2.parquet', engine='pyarrow')
Neo4jDataAccess().save_parquet_df_to_graph(tdf, 'dave')
print('----')

# Test get_tweet_hydrated_status_by_id
print('----')
print('Testing get_tweet_hydrated_status_by_id')
df = Neo4jDataAccess().get_tweet_hydrated_status_by_id(df)
print(df)

# Test get_tweet_by_id
print('----')
print('Testing get_tweet_by_id')
df = Neo4jDataAccess().get_tweet_by_id(df)
print(df)
print('----')
print('Testing get_tweet_by_id for cols')
Beispiel #14
0
 def test_get_neo4j_graph_wrong_parameter(self):
     with pytest.raises(TypeError) as excinfo:
         res = Neo4jDataAccess(
             neo4j_creds=self.creds).get_neo4j_graph('test')
     assert "role_type parameter" in str(excinfo.value)
Beispiel #15
0
 def test_get_neo4j_graph(self):
     res = Neo4jDataAccess(neo4j_creds=self.creds).get_neo4j_graph(
         Neo4jDataAccess.RoleType.READER)
     assert res is not None
from modules.IngestDrugSynonyms import IngestDrugSynonyms
from modules.DrugSynonymDataToNeo4j import DrugSynonymDataToNeo4j
from modules.Neo4jDataAccess import Neo4jDataAccess

import logging

logging.basicConfig(format='>>> %(message)s', level=logging.INFO)

drugSynonym = IngestDrugSynonyms()
drugSynonym.auto_get_and_clean_data()
drugSynonym.create_drug_study_links()
drugSynonym.create_url_study_links()

neo4jBridge = DrugSynonymDataToNeo4j(
    graph=Neo4jDataAccess().get_neo4j_graph(Neo4jDataAccess.RoleType.WRITER))

neo4jBridge.merge_drugs(drugSynonym.drugs)
neo4jBridge.merge_synonyms(drugSynonym.synonyms)
neo4jBridge.merge_drug_to_synonym_rels(drugSynonym.drug_synonym_rels)

neo4jBridge.merge_studies(drugSynonym.all_studies_df)

neo4jBridge.merge_drug_to_study_rels(drugSynonym.appeared_in_edges)

neo4jBridge.merge_url(drugSynonym.urls)
neo4jBridge.merge_url_to_study_rels(drugSynonym.url_points_at_study_edges)