コード例 #1
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
    def test_read_gbq_with_corrupted_private_key_json_should_fail(self):
        _skip_if_no_private_key_path()

        with tm.assertRaises(gbq.InvalidPrivateKeyFormat):
            gbq.read_gbq(
                'SELECT 1', project_id='x',
                private_key=re.sub('[a-z]', '9', _get_private_key_path()))
コード例 #2
0
ファイル: test_gbq.py プロジェクト: Alias4bb/pandas
    def test_read_gbq_with_corrupted_private_key_json_should_fail(self):
        _skip_if_no_private_key_contents()

        with tm.assertRaises(gbq.InvalidPrivateKeyFormat):
            gbq.read_gbq(
                'SELECT 1', project_id='x',
                private_key=re.sub('[a-z]', '9', PRIVATE_KEY_JSON_CONTENTS))
コード例 #3
0
ファイル: test_gbq.py プロジェクト: Alias4bb/pandas
    def test_invalid_option_for_sql_dialect(self):
        sql_statement = "SELECT DISTINCT id FROM " \
                        "`publicdata.samples.wikipedia` LIMIT 10"

        # Test that an invalid option for `dialect` raises ValueError
        with tm.assertRaises(ValueError):
            gbq.read_gbq(sql_statement, project_id=PROJECT_ID,
                         dialect='invalid')

        # Test that a correct option for dialect succeeds
        # to make sure ValueError was due to invalid dialect
        gbq.read_gbq(sql_statement, project_id=PROJECT_ID,
                     dialect='standard')
コード例 #4
0
ファイル: test_gbq.py プロジェクト: Alias4bb/pandas
    def test_standard_sql(self):
        standard_sql = "SELECT DISTINCT id FROM " \
                       "`publicdata.samples.wikipedia` LIMIT 10"

        # Test that a standard sql statement fails when using
        # the legacy SQL dialect (default value)
        with tm.assertRaises(gbq.GenericGBQException):
            gbq.read_gbq(standard_sql, project_id=PROJECT_ID)

        # Test that a standard sql statement succeeds when
        # setting dialect='standard'
        df = gbq.read_gbq(standard_sql, project_id=PROJECT_ID,
                          dialect='standard')
        self.assertEqual(len(df.drop_duplicates()), 10)
コード例 #5
0
ファイル: test_gbq.py プロジェクト: Alias4bb/pandas
    def test_legacy_sql(self):
        legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"

        # Test that a legacy sql statement fails when
        # setting dialect='standard'
        with tm.assertRaises(gbq.GenericGBQException):
            gbq.read_gbq(legacy_sql, project_id=PROJECT_ID,
                         dialect='standard')

        # Test that a legacy sql statement succeeds when
        # setting dialect='legacy'
        df = gbq.read_gbq(legacy_sql, project_id=PROJECT_ID,
                          dialect='legacy')
        self.assertEqual(len(df.drop_duplicates()), 10)
コード例 #6
0
ファイル: test_gbq.py プロジェクト: Sakampavankumar/pandas-1
    def test_upload_data(self):
        test_size = 1000001
        #create df to test for all BQ datatypes except RECORD
        bools = np.random.randint(2, size=(1, test_size)).astype(bool)
        flts = np.random.randn(1, test_size)
        ints = np.random.randint(1, 10, size=(1, test_size))
        strs = np.random.randint(1, 10, size=(1, test_size)).astype(str)
        times = [
            datetime.datetime.now(pytz.timezone('US/Arizona'))
            for t in xrange(test_size)
        ]
        df = DataFrame(
            {
                'bools': bools[0],
                'flts': flts[0],
                'ints': ints[0],
                'strs': strs[0],
                'times': times[0]
            },
            index=range(test_size))
        gbq.to_gbq(df,
                   "pydata_pandas_bq_testing.new_test",
                   project_id=PROJECT_ID,
                   chunksize=10000)
        sleep(60)  # <- Curses Google!!!

        result = gbq.read_gbq(
            "SELECT COUNT(*) as NUM_ROWS FROM pydata_pandas_bq_testing.new_test",
            project_id=PROJECT_ID)
        self.assertEqual(result['NUM_ROWS'][0], test_size)
コード例 #7
0
    def test_should_read_as_user_account(self):
        if _in_travis_environment():
            raise nose.SkipTest("Cannot run local auth in travis environment")

        query = 'SELECT "PI" as VALID_STRING'
        df = gbq.read_gbq(query, project_id=_get_project_id())
        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
コード例 #8
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
    def test_upload_data_if_table_exists_append(self):
        destination_table = DESTINATION_TABLE + "3"

        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_different_schema = tm.makeMixedDataFrame()

        # Initialize table with sample data
        gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000,
                   private_key=_get_private_key_path())

        # Test the if_exists parameter with value 'append'
        gbq.to_gbq(df, destination_table, _get_project_id(),
                   if_exists='append', private_key=_get_private_key_path())

        sleep(30)  # <- Curses Google!!!

        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}"
                              .format(destination_table),
                              project_id=_get_project_id(),
                              private_key=_get_private_key_path())
        self.assertEqual(result['NUM_ROWS'][0], test_size * 2)

        # Try inserting with a different schema, confirm failure
        with tm.assertRaises(gbq.InvalidSchema):
            gbq.to_gbq(df_different_schema, destination_table,
                       _get_project_id(), if_exists='append',
                       private_key=_get_private_key_path())
コード例 #9
0
def generate_analytics_cards(service, project_id, backup_date):
    # We generate the analytics cards from the ProblemLog table,
    # only if necessary. Note that this query is expensive, so
    # we only do this once (and subsequent queries can just look
    # at the analytics cards - much smaller).
    #
    # Let's look at the most recently done analytics card
    # and if it's close enough to the backup date, assume
    # that we don't need to regenerate the table.
    results = gbq.read_gbq('SELECT MAX(time_done) as max_time \n'
                           'FROM [tony.analytics_cards_1]',
                           project_id=project_id)
    usec = int(results['max_time'][0])
    ts = usec / 1000 / 1000
    last_ac_time = datetime.datetime.fromtimestamp(ts)
    last_date = datetime.datetime.strptime(backup_date, '%Y_%m_%d')
    gap = last_date - last_ac_time
    gap_in_days = gap.days
    print 'Analytics card table is', gap, 'old'
    if gap_in_days > 1:
        print 'Regenerating analytics_cards_1...'
        query_string = (
            'SELECT user_id, task_type, time_done, exercise, correct\n'
            'FROM [%s.ProblemLog]\n'
            'WHERE task_type = \'mastery.analytics\'\n'
        ) % backup_date
        run_query(service, project_id, query_string, 'analytics_cards_1')
    else:
        print 'Table is up to date. Skipping...'
コード例 #10
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
    def test_upload_data_if_table_exists_replace(self):

        raise nose.SkipTest("buggy test")

        destination_table = DESTINATION_TABLE + "4"

        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_different_schema = tm.makeMixedDataFrame()

        # Initialize table with sample data
        gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000,
                   private_key=_get_private_key_path())

        # Test the if_exists parameter with the value 'replace'.
        gbq.to_gbq(df_different_schema, destination_table,
                   _get_project_id(), if_exists='replace',
                   private_key=_get_private_key_path())

        sleep(30)  # <- Curses Google!!!

        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}"
                              .format(destination_table),
                              project_id=_get_project_id(),
                              private_key=_get_private_key_path())
        self.assertEqual(result['NUM_ROWS'][0], 5)
コード例 #11
0
ファイル: test_gbq.py プロジェクト: BrenBarn/pandas
 def test_download_dataset_larger_than_200k_rows(self):
     test_size = 200005
     # Test for known BigQuery bug in datasets larger than 100k rows
     # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results
     df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] GROUP EACH BY id ORDER BY id ASC LIMIT {0}".format(test_size),
                       project_id=PROJECT_ID)
     self.assertEqual(len(df.drop_duplicates()), test_size)
コード例 #12
0
 def test_index_column(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2"
     result_frame = gbq.read_gbq(
         query, project_id=PROJECT_ID, index_col="STRING_1")
     correct_frame = DataFrame(
         {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1")
     tm.assert_equal(result_frame.index.name, correct_frame.index.name)
コード例 #13
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
 def test_should_properly_handle_arbitrary_timestamp(self):
     query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") as VALID_TIMESTAMP'
     df = gbq.read_gbq(query, project_id=_get_project_id(),
                       private_key=_get_private_key_path())
     tm.assert_frame_equal(df, DataFrame({
         'VALID_TIMESTAMP': [np.datetime64('2004-09-15T05:00:00.000000Z')]
     }))
コード例 #14
0
ファイル: test_gbq.py プロジェクト: orivej/pandas
 def test_should_read_as_service_account_with_key_contents(self):
     _skip_if_no_private_key_contents()
     query = 'SELECT "PI" as VALID_STRING'
     df = gbq.read_gbq(query,
                       project_id=_get_project_id(),
                       private_key=_get_private_key_contents())
     tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
コード例 #15
0
ファイル: test_gbq.py プロジェクト: Sakampavankumar/pandas-1
 def test_zero_rows(self):
     # Bug fix for https://github.com/pydata/pandas/issues/10273
     df = gbq.read_gbq(
         "SELECT title, language  FROM [publicdata:samples.wikipedia] where timestamp=-9999999",
         project_id=PROJECT_ID)
     expected_result = DataFrame(columns=['title', 'language'])
     self.assert_frame_equal(df, expected_result)
コード例 #16
0
ファイル: test_gbq.py プロジェクト: Sakampavankumar/pandas-1
 def test_download_dataset_larger_than_200k_rows(self):
     # Test for known BigQuery bug in datasets larger than 100k rows
     # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results
     df = gbq.read_gbq(
         "SELECT id FROM [publicdata:samples.wikipedia] GROUP EACH BY id ORDER BY id ASC LIMIT 200005",
         project_id=PROJECT_ID)
     self.assertEqual(len(df.drop_duplicates()), 200005)
コード例 #17
0
ファイル: test_gbq.py プロジェクト: Sakampavankumar/pandas-1
    def test_unicode_string_conversion_and_normalization(self):
        correct_test_datatype = DataFrame({'UNICODE_STRING': [u("\xe9\xfc")]})

        query = 'SELECT "\xc3\xa9\xc3\xbc" as UNICODE_STRING'

        df = gbq.read_gbq(query, project_id=PROJECT_ID)
        tm.assert_frame_equal(df, correct_test_datatype)
コード例 #18
0
ファイル: test_gbq.py プロジェクト: orivej/pandas
    def test_upload_data_if_table_exists_replace(self):

        raise nose.SkipTest("buggy test")

        destination_table = DESTINATION_TABLE + "4"

        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_different_schema = tm.makeMixedDataFrame()

        # Initialize table with sample data
        gbq.to_gbq(df,
                   destination_table,
                   _get_project_id(),
                   chunksize=10000,
                   private_key=_get_private_key_path())

        # Test the if_exists parameter with the value 'replace'.
        gbq.to_gbq(df_different_schema,
                   destination_table,
                   _get_project_id(),
                   if_exists='replace',
                   private_key=_get_private_key_path())

        sleep(30)  # <- Curses Google!!!

        result = gbq.read_gbq(
            "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table),
            project_id=_get_project_id(),
            private_key=_get_private_key_path())
        self.assertEqual(result['NUM_ROWS'][0], 5)
コード例 #19
0
ファイル: test_gbq.py プロジェクト: orivej/pandas
 def test_should_properly_handle_valid_floats(self):
     query = 'SELECT PI() as VALID_FLOAT'
     df = gbq.read_gbq(query,
                       project_id=_get_project_id(),
                       private_key=_get_private_key_path())
     tm.assert_frame_equal(df,
                           DataFrame({'VALID_FLOAT': [3.141592653589793]}))
コード例 #20
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
    def test_should_read_as_user_account(self):
        if _in_travis_environment():
            raise nose.SkipTest("Cannot run local auth in travis environment")

        query = 'SELECT "PI" as VALID_STRING'
        df = gbq.read_gbq(query, project_id=_get_project_id())
        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
コード例 #21
0
ファイル: test_gbq.py プロジェクト: akloster/pandas
    def test_upload_replace(self):
        # Attempting to overwrite an existing table with valid data and a valid schema should succeed
        if not os.path.exists(self.bq_token):
            raise nose.SkipTest('Skipped because authentication information is not available.')

        schema = ['STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN',
                  'INTEGER', 'STRING', 'INTEGER',
                  'STRING', 'INTEGER', 'BOOLEAN', 'BOOLEAN',
                  'INTEGER', 'STRING', 'INTEGER']

        # Setup an existing table
        array1 = [['', 1, '', 1, False, 1, '00.111.00.111', 1, 'hola',
                 1, True, True, 1, 'Sith', 1]]
        df1 = DataFrame(array1, columns=['title','id','language','wp_namespace','is_redirect','revision_id',
                                       'contributor_ip','contributor_id','contributor_username','timestamp',
                                       'is_minor','is_bot','reversion_id','comment','num_characters'])
        gbq.to_gbq(df1, 'pandas_testing_dataset.test_data5', schema=schema, col_order=None, if_exists='fail')
        
        array2 = [['TESTING_GBQ', 999999999, 'hi', 0, True, 9999999999, '00.000.00.000', 1, 'hola',
                 99999999, False, False, 1, 'Jedi', 11210]]

        # Overwrite the existing table with different data
        df2 = DataFrame(array2, columns=['title','id','language','wp_namespace','is_redirect','revision_id',
                                       'contributor_ip','contributor_id','contributor_username','timestamp',
                                       'is_minor','is_bot','reversion_id','comment','num_characters'])
        gbq.to_gbq(df2, 'pandas_testing_dataset.test_data5', schema=schema, col_order=None, if_exists='replace')
        
        # Read the table and confirm the new data is all that is there
        a = gbq.read_gbq("SELECT * FROM pandas_testing_dataset.test_data5")
        self.assertTrue((a == df2).all().all())
コード例 #22
0
ファイル: test_medication.py プロジェクト: ajb5d/mimic-code
def test_vasopressor_doses(dataset, project_id):
    # verify vasopressors have reasonable doses
    # based on uptodate graphic 99963 version 19.0
    # double the maximum dose used in refractory shock is the upper limit used
    itemids = {
        'milrinone': 221986,
        'dobutamine': 221653,
        'dopamine': 221662,
        'epinephrine': 221289,
        'norepinephrine': 221906,
        'phenylephrine': 221749,
        'vasopressin': 222315,
    }
    max_dose = {
        'milrinone': 1.5,
        'dobutamine': 40,
        'dopamine': 40,
        'epinephrine': 4,
        'norepinephrine': 6.6,
        'phenylephrine': 18.2,
        'vasopressin': 0.08,
    }

    for vaso, dose in max_dose.items():
        query = f"""
        select COUNT(vaso_rate) AS n_above_rate
        FROM mimic_derived.{vaso}
        WHERE vaso_rate >= {dose}
        """
        df = gbq.read_gbq(query, project_id=project_id, dialect="standard")
        n_above_rate = df.loc[0, 'n_above_rate']
        assert n_above_rate == 0, f'found {vaso} rows with dose above {dose}, potentially incorrect'
コード例 #23
0
    def test_upload_data_if_table_exists_append(self):
        destination_table = DESTINATION_TABLE + "3"

        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_different_schema = tm.makeMixedDataFrame()

        # Initialize table with sample data
        gbq.to_gbq(df, destination_table, PROJECT_ID, chunksize=10000)

        # Test the if_exists parameter with value 'append'
        gbq.to_gbq(df, destination_table, PROJECT_ID, if_exists='append')

        sleep(30)  # <- Curses Google!!!

        result = gbq.read_gbq(
            "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table),
            project_id=PROJECT_ID)
        self.assertEqual(result['NUM_ROWS'][0], test_size * 2)

        # Try inserting with a different schema, confirm failure
        with tm.assertRaises(gbq.InvalidSchema):
            gbq.to_gbq(df_different_schema,
                       destination_table,
                       PROJECT_ID,
                       if_exists='append')
コード例 #24
0
 def test_should_read_as_service_account_with_key_contents(self):
     _skip_if_no_private_key_contents()
     query = 'SELECT "PI" as VALID_STRING'
     df = gbq.read_gbq(query,
                       project_id=PROJECT_ID,
                       private_key=PRIVATE_KEY_JSON_CONTENTS)
     tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
コード例 #25
0
ファイル: test_gbq.py プロジェクト: Alias4bb/pandas
 def test_index_column(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2"
     result_frame = gbq.read_gbq(
         query, project_id=PROJECT_ID, index_col="STRING_1")
     correct_frame = DataFrame(
         {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1")
     tm.assert_equal(result_frame.index.name, correct_frame.index.name)
コード例 #26
0
ファイル: test_gbq.py プロジェクト: cscanlin/pandas
 def test_column_order_plus_index(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
     col_order = ["STRING_3", "STRING_2"]
     result_frame = gbq.read_gbq(query, project_id=PROJECT_ID, index_col="STRING_1", col_order=col_order)
     correct_frame = DataFrame({"STRING_1": ["a"], "STRING_2": ["b"], "STRING_3": ["c"]})
     correct_frame.set_index("STRING_1", inplace=True)
     correct_frame = correct_frame[col_order]
     tm.assert_frame_equal(result_frame, correct_frame)
コード例 #27
0
def flag_status():
    # Fetch Data from Big Query Table
    query = '''select count(*) from airflow.covid '''
    result = gbq.read_gbq(query, project_id='airflow29thmay-08')
    print(result)
    data = pd.read_csv('/home/nineleaps/PycharmProjects/air/file1.csv')
    total_rows = len(data.axes[0])
    print(total_rows)
コード例 #28
0
ファイル: test_gbq.py プロジェクト: cscanlin/pandas
 def test_zero_rows(self):
     # Bug fix for https://github.com/pydata/pandas/issues/10273
     df = gbq.read_gbq(
         "SELECT title, language  FROM " "[publicdata:samples.wikipedia] where " "timestamp=-9999999",
         project_id=PROJECT_ID,
     )
     expected_result = DataFrame(columns=["title", "language"])
     self.assert_frame_equal(df, expected_result)
コード例 #29
0
ファイル: test_gbq.py プロジェクト: mcooney9790/QueryPractice
 def test_column_order_plus_index(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
     col_order = ['STRING_3', 'STRING_2']
     result_frame = gbq.read_gbq(query, project_id=PROJECT_ID, index_col='STRING_1', col_order=col_order)
     correct_frame = DataFrame({'STRING_1': ['a'], 'STRING_2': ['b'], 'STRING_3': ['c']})
     correct_frame.set_index('STRING_1', inplace=True)
     correct_frame = correct_frame[col_order]
     tm.assert_frame_equal(result_frame, correct_frame)
コード例 #30
0
ファイル: test_gbq.py プロジェクト: yizhiyong/pandas
 def test_column_order(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
     col_order = ['STRING_3', 'STRING_1', 'STRING_2']
     result_frame = gbq.read_gbq(
         query, project_id=PROJECT_ID, col_order=col_order)
     correct_frame = DataFrame({'STRING_1': ['a'], 'STRING_2': [
                               'b'], 'STRING_3': ['c']})[col_order]
     tm.assert_frame_equal(result_frame, correct_frame)
コード例 #31
0
 def test_index_column(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2"
     result_frame = gbq.read_gbq(query, project_id=_get_project_id(),
                                 index_col="STRING_1",
                                 private_key=_get_private_key_path())
     correct_frame = DataFrame(
         {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1")
     tm.assert_equal(result_frame.index.name, correct_frame.index.name)
コード例 #32
0
ファイル: test_gbq.py プロジェクト: ARF1/pandas
 def test_column_order_plus_index(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
     col_order = ['STRING_3', 'STRING_2']
     result_frame = gbq.read_gbq(query, project_id=PROJECT_ID, index_col='STRING_1', col_order=col_order)
     correct_frame = DataFrame({'STRING_1' : ['a'], 'STRING_2' : ['b'], 'STRING_3' : ['c']})
     correct_frame.set_index('STRING_1', inplace=True)
     correct_frame = correct_frame[col_order]
     tm.assert_frame_equal(result_frame, correct_frame)
コード例 #33
0
ファイル: test_gbq.py プロジェクト: xpnguyen/pandas
    def test_invalid_option_for_sql_dialect(self):
        sql_statement = "SELECT DISTINCT id FROM " \
                        "`publicdata.samples.wikipedia` LIMIT 10"

        # Test that an invalid option for `dialect` raises ValueError
        with tm.assertRaises(ValueError):
            gbq.read_gbq(sql_statement,
                         project_id=_get_project_id(),
                         dialect='invalid',
                         private_key=_get_private_key_path())

        # Test that a correct option for dialect succeeds
        # to make sure ValueError was due to invalid dialect
        gbq.read_gbq(sql_statement,
                     project_id=_get_project_id(),
                     dialect='standard',
                     private_key=_get_private_key_path())
コード例 #34
0
 def test_should_properly_handle_timestamp_unix_epoch(self):
     query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") as UNIX_EPOCH'
     df = gbq.read_gbq(query, project_id=PROJECT_ID)
     tm.assert_frame_equal(
         df,
         DataFrame(
             {'UNIX_EPOCH':
              [np.datetime64('1970-01-01T00:00:00.000000Z')]}))
コード例 #35
0
    def load_training_data_from_pubs(self, training_publications_df):
        tmp_table = 'patents._tmp_training'
        self.load_df_to_bq_tmp(df=training_publications_df,
                               tmp_table=tmp_table)

        training_data_query = '''
            SELECT DISTINCT
                REGEXP_EXTRACT(LOWER(p.publication_number), r'[a-z]+-(\d+)-[a-z0-9]+') as pub_num,
                p.publication_number,
                p.family_id,
                p.priority_date,
                title.text as title_text,
                abstract.text as abstract_text,
                'unused' as claims_text,
                --SUBSTR(claims.text, 0, 5000) as claims_text,
                'unused' as description_text,
                --SUBSTR(description.text, 0, 5000) as description_text,
                STRING_AGG(citations.publication_number) AS refs,
                STRING_AGG(cpcs.code) AS cpcs
            FROM
              `patents-public-data.patents.publications` p,
              `{}` as tmp,
              UNNEST(p.title_localized) AS title,
              UNNEST(p.abstract_localized) AS abstract,
              UNNEST(p.claims_localized) AS claims,
              UNNEST(p.description_localized) AS description,
              UNNEST(p.title_localized) AS title_lang,
              UNNEST(p.abstract_localized) AS abstract_lang,
              UNNEST(p.claims_localized) AS claims_lang,
              UNNEST(p.description_localized) AS description_lang,
              UNNEST(citation) AS citations,
              UNNEST(cpc) AS cpcs
            WHERE
                p.publication_number = tmp.publication_number
                AND country_code = 'US'
                AND title_lang.language = 'en'
                AND abstract_lang.language = 'en'
                AND claims_lang.language = 'en'
                AND description_lang.language = 'en'
            GROUP BY p.publication_number, p.family_id, p.priority_date, title.text,
                        abstract.text, claims.text, description.text
            ;
        '''.format(tmp_table)

        print('Loading patent texts from provided publication numbers.')
        #print('Training data query:\n{}'.format(training_data_query))
        training_data_df = gbq.read_gbq(query=training_data_query,
                                        project_id=self.bq_project,
                                        verbose=False,
                                        dialect='standard',
                                        configuration={
                                            'query': {
                                                'useQueryCache': True,
                                                'allowLargeResults': False
                                            }
                                        })

        return training_data_df
コード例 #36
0
 def test_should_properly_handle_arbitrary_timestamp(self):
     query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") as VALID_TIMESTAMP'
     df = gbq.read_gbq(query, project_id=PROJECT_ID)
     tm.assert_frame_equal(
         df,
         DataFrame({
             'VALID_TIMESTAMP':
             [np.datetime64('2004-09-15T05:00:00.000000Z')]
         }))
コード例 #37
0
ファイル: test_gbq.py プロジェクト: ARF1/pandas
    def test_unicode_string_conversion_and_normalization(self):
        correct_test_datatype = DataFrame(
            {'UNICODE_STRING' : [u("\xe9\xfc")]}
        )

        query = 'SELECT "\xc3\xa9\xc3\xbc" as UNICODE_STRING'

        df = gbq.read_gbq(query, project_id=PROJECT_ID)
        tm.assert_frame_equal(df, correct_test_datatype)
コード例 #38
0
 def test_column_order(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
     col_order = ['STRING_3', 'STRING_1', 'STRING_2']
     result_frame = gbq.read_gbq(query, project_id=_get_project_id(),
                                 col_order=col_order,
                                 private_key=_get_private_key_path())
     correct_frame = DataFrame({'STRING_1': ['a'], 'STRING_2': [
                               'b'], 'STRING_3': ['c']})[col_order]
     tm.assert_frame_equal(result_frame, correct_frame)
コード例 #39
0
ファイル: test_gbq.py プロジェクト: xpnguyen/pandas
    def test_standard_sql(self):
        standard_sql = "SELECT DISTINCT id FROM " \
                       "`publicdata.samples.wikipedia` LIMIT 10"

        # Test that a standard sql statement fails when using
        # the legacy SQL dialect (default value)
        with tm.assertRaises(gbq.GenericGBQException):
            gbq.read_gbq(standard_sql,
                         project_id=_get_project_id(),
                         private_key=_get_private_key_path())

        # Test that a standard sql statement succeeds when
        # setting dialect='standard'
        df = gbq.read_gbq(standard_sql,
                          project_id=_get_project_id(),
                          dialect='standard',
                          private_key=_get_private_key_path())
        self.assertEqual(len(df.drop_duplicates()), 10)
コード例 #40
0
ファイル: test_gbq.py プロジェクト: xpnguyen/pandas
    def test_legacy_sql(self):
        legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"

        # Test that a legacy sql statement fails when
        # setting dialect='standard'
        with tm.assertRaises(gbq.GenericGBQException):
            gbq.read_gbq(legacy_sql,
                         project_id=_get_project_id(),
                         dialect='standard',
                         private_key=_get_private_key_path())

        # Test that a legacy sql statement succeeds when
        # setting dialect='legacy'
        df = gbq.read_gbq(legacy_sql,
                          project_id=_get_project_id(),
                          dialect='legacy',
                          private_key=_get_private_key_path())
        self.assertEqual(len(df.drop_duplicates()), 10)
コード例 #41
0
ファイル: test_gbq.py プロジェクト: Barneyjm/pandas
    def test_download_dataset_larger_than_100k_rows(self):
        # Test for known BigQuery bug in datasets larger than 100k rows
        # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results
        if not os.path.exists(self.bq_token):
            raise nose.SkipTest('Skipped because authentication information is not available.')

        client = gbq._authenticate()
        a = gbq.read_gbq("SELECT id, FROM [publicdata:samples.wikipedia] LIMIT 100005")
        self.assertTrue(len(a) == 100005)
コード例 #42
0
    def expand_l1(self, cpc_codes_series, refs_series):
        self.load_df_to_bq_tmp(pd.DataFrame(refs_series, columns=['pub_num']),
                               self.l1_tmp_table)

        cpc_where_clause = ",".join("'" + cpc_codes_series + "'")

        expansion_query = '''
            SELECT DISTINCT publication_number, ExpansionLevel, refs
            FROM
            (
            SELECT
              b.publication_number,
              'L1' as ExpansionLevel,
              STRING_AGG(citations.publication_number) AS refs
            FROM
              `patents-public-data.patents.publications` AS b,
              UNNEST(citation) AS citations,
              UNNEST(cpc) AS cpcs
            WHERE
            (
                cpcs.code IN
                (
                {}
                )
            )
            AND citations.publication_number != ''
            AND country_code IN ('US')
            GROUP BY b.publication_number

            UNION ALL

            SELECT
              b.publication_number,
              'L1' as ExpansionLevel,
              STRING_AGG(citations.publication_number) AS refs
            FROM
              `patents-public-data.patents.publications` AS b,
              `{}` as tmp,
              UNNEST(citation) AS citations
            WHERE
            (
                b.publication_number = tmp.pub_num
            )
            AND citations.publication_number != ''
            GROUP BY b.publication_number
            )
            ;
        '''.format(cpc_where_clause, self.l1_tmp_table)

        #print(expansion_query)
        expansion_df = gbq.read_gbq(query=expansion_query,
                                    project_id=self.bq_project,
                                    verbose=False,
                                    dialect='standard')

        return expansion_df
コード例 #43
0
    def main(project_id, batch, num_retries, interval, use_legacy_sql):
        # [START build_service]
        # Construct the service object for interacting with the BigQuery API.
        bigquery = googleapiclient.discovery.build('bigquery', 'v2')
        # [END build_service]

        #query_string = "SELECT domain, date FROM (SELECT domain, date, bundle, SUM(available_inventory) AS auctionss, SUM(demand_side_revenue) AS demand_side_revenue FROM TABLE_DATE_RANGE(reporting_hudson_views.stats_daily_, TIMESTAMP('2018-03-01'), TIMESTAMP('2018-03-25')) GROUP BY date, domain, bundle ) GROUP BY domain, bundle, date HAVING bundle IS NULL order by demand_side_revenue DESC"
        # Submit the job and wait for it to complete.
        use_legacy_sql
        query_job = async_query(bigquery, project_id, query_string, batch,
                                num_retries, use_legacy_sql)
        df3 = pd.read_csv("AllDevice.csv")
        df = gbq.read_gbq(query_string, project_id=project_id)
        print(df.shape)
        frames = [df, df3]
        df5 = pd.concat(frames)
        print(df5.shape)
        df5.to_csv("AllDevice.csv", index=False)

        df4 = df5["device_id"]
        df4 = df4

        df4.to_csv(filename, header=False, index=False)
        """myFTP = ftplib.FTP("ec2-204-236-207-77.compute-1.amazonaws.com", "kunal", "kunal123")
        ec2-204-236-207-77.compute-1.amazonaws.comkunalkunal@123

        myFTP.set_pasv(False)
        file=open(filename,"rb")
        filename1='STOR ' + filename
        myFTP.storbinary(filename1,file)"""

        #print(helper_config['ftp']['host'])
        host = helper_config['ftp']['host']
        username = helper_config['ftp']['user_name']
        password = helper_config['ftp']['password']

        print(host + username + password)

        myFTP = ftplib.FTP(host, username, password)
        myFTP.set_pasv(False)
        file = open(filename, "rb")
        filename1 = 'STOR ' + filename
        myFTP.storbinary(filename1, file)

        poll_job(bigquery, query_job)

        # Page through the result set and print all results.
        page_token = None
        while True:
            page = bigquery.jobs().getQueryResults(
                pageToken=page_token,
                **query_job['jobReference']).execute(num_retries=2)

            page_token = page.get('pageToken')
            if not page_token:
                break
コード例 #44
0
ファイル: bqutils.py プロジェクト: vincnt/Lambo
def bqtopd():
    query = """
            SELECT CMC_ID, CC_USD_PRICE, Timestamp
            FROM Market_Fetch.raw_prices
            WHERE CMC_ID = 'revain'
            ORDER BY Timestamp DESC
            LIMIT 10;
        """
    data_frame = gbq.read_gbq(query, "lambo-192519")
    print(data_frame.head())
コード例 #45
0
 def test_download_dataset_larger_than_200k_rows(self):
     test_size = 200005
     # Test for known BigQuery bug in datasets larger than 100k rows
     # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results
     df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] "
                       "GROUP EACH BY id ORDER BY id ASC LIMIT {0}"
                       .format(test_size),
                       project_id=_get_project_id(),
                       private_key=_get_private_key_path())
     self.assertEqual(len(df.drop_duplicates()), test_size)
コード例 #46
0
def conexao_bq(select):
    #Informar o arquivo json das credenciais EXTRAIDAS DO GCP
    credentials = service_account.Credentials.from_service_account_file(
        'C:\\Users\\bruno\\OneDrive\\BigQUery\\BigQuery-d49b6d4c827c.json')
    #Cria uma consulta com as credenciais do BQ e o Project ID
    df = gbq.read_gbq(select,
                      project_id='bigquery-194320',
                      credentials=credentials)
    #Retorna a consulta
    return df
コード例 #47
0
ファイル: test_gbq.py プロジェクト: yizhiyong/pandas
 def test_zero_rows(self):
     # Bug fix for https://github.com/pydata/pandas/issues/10273
     df = gbq.read_gbq("SELECT title, id "
                       "FROM [publicdata:samples.wikipedia] "
                       "WHERE timestamp=-9999999",
                       project_id=PROJECT_ID)
     page_array = np.zeros(
         (0,), dtype=[('title', object), ('id', np.dtype(float))])
     expected_result = DataFrame(page_array, columns=['title', 'id'])
     self.assert_frame_equal(df, expected_result)
コード例 #48
0
def get_row_count_from_table():
    try:
        sql = 'select count(*) as total_rows from Covid19.statewise_daily_cases'
        total_rows_df = gbq.read_gbq(query=sql,
                                     project_id=project_id,
                                     credentials=credentials)
        return total_rows_df['total_rows'][0]
    except GenericGBQException as e:
        logging.error("Exception: " + str(e))
        raise
コード例 #49
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
    def test_query_inside_configuration(self):
        query_no_use = 'SELECT "PI_WRONG" as VALID_STRING'
        query = 'SELECT "PI" as VALID_STRING'
        config = {
            'query': {
                "query": query,
                "useQueryCache": False,
            }
        }
        # Test that it can't pass query both
        # inside config and as parameter
        with tm.assertRaises(ValueError):
            gbq.read_gbq(query_no_use, project_id=_get_project_id(),
                         private_key=_get_private_key_path(),
                         configuration=config)

        df = gbq.read_gbq(None, project_id=_get_project_id(),
                          private_key=_get_private_key_path(),
                          configuration=config)
        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
コード例 #50
0
ファイル: test_gbq.py プロジェクト: nicku33/pandas
    def test_upload_data(self):
        table_name = 'new_test1'

        test_size = 1000001
        df = make_mixed_dataframe_v2(test_size)

        gbq.to_gbq(df, "pydata_pandas_bq_testing." + table_name, PROJECT_ID, chunksize=10000)

        sleep(60)  # <- Curses Google!!!

        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM pydata_pandas_bq_testing." + table_name, project_id=PROJECT_ID)
        self.assertEqual(result['NUM_ROWS'][0], test_size)
コード例 #51
0
ファイル: test_gbq.py プロジェクト: cscanlin/pandas
    def test_upload_data(self):
        destination_table = DESTINATION_TABLE + "1"

        test_size = 1000001
        df = make_mixed_dataframe_v2(test_size)

        gbq.to_gbq(df, destination_table, PROJECT_ID, chunksize=10000)

        sleep(60)  # <- Curses Google!!!

        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table), project_id=PROJECT_ID)
        self.assertEqual(result["NUM_ROWS"][0], test_size)
コード例 #52
0
ファイル: test_gbq.py プロジェクト: cscanlin/pandas
    def test_unicode_string_conversion_and_normalization(self):
        correct_test_datatype = DataFrame({"UNICODE_STRING": [u("\xe9\xfc")]})

        unicode_string = "\xc3\xa9\xc3\xbc"

        if compat.PY3:
            unicode_string = unicode_string.encode("latin-1").decode("utf8")

        query = 'SELECT "{0}" as UNICODE_STRING'.format(unicode_string)

        df = gbq.read_gbq(query, project_id=PROJECT_ID)
        tm.assert_frame_equal(df, correct_test_datatype)
コード例 #53
0
def pandas_get_table(dTable):
    "fetch a table and return dataframe"
    from pandas.io import gbq
    
    sProjectID = dTable['projectId']
    sDatasetID = dTable['datasetId']
    sTableID = dTable['tableId']
    sQuery = "SELECT * FROM [{}.{}]".format(sDatasetID, sTableID)
    
    df = gbq.read_gbq(sQuery, sProjectID)
    
    return df 
コード例 #54
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
    def test_query_with_parameters(self):
        sql_statement = "SELECT @param1 + @param2 as VALID_RESULT"
        config = {
            'query': {
                "useLegacySql": False,
                "parameterMode": "named",
                "queryParameters": [
                    {
                        "name": "param1",
                        "parameterType": {
                            "type": "INTEGER"
                        },
                        "parameterValue": {
                            "value": 1
                        }
                    },
                    {
                        "name": "param2",
                        "parameterType": {
                            "type": "INTEGER"
                        },
                        "parameterValue": {
                            "value": 2
                        }
                    }
                ]
            }
        }
        # Test that a query that relies on parameters fails
        # when parameters are not supplied via configuration
        with tm.assertRaises(ValueError):
            gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                         private_key=_get_private_key_path())

        # Test that the query is successful because we have supplied
        # the correct query parameters via the 'config' option
        df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                          private_key=_get_private_key_path(),
                          configuration=config)
        tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]}))
コード例 #55
0
ファイル: test_gbq.py プロジェクト: AlexisMignon/pandas
 def test_configuration_without_query(self):
     sql_statement = 'SELECT 1'
     config = {
         'copy': {
             "sourceTable": {
                 "projectId": _get_project_id(),
                 "datasetId": "publicdata:samples",
                 "tableId": "wikipedia"
             },
             "destinationTable": {
                 "projectId": _get_project_id(),
                 "datasetId": "publicdata:samples",
                 "tableId": "wikipedia_copied"
             },
         }
     }
     # Test that only 'query' configurations are supported
     # nor 'copy','load','extract'
     with tm.assertRaises(ValueError):
         gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                      private_key=_get_private_key_path(),
                      configuration=config)
コード例 #56
0
ファイル: test_gbq.py プロジェクト: ARF1/pandas
    def test_upload_data(self):
        test_size = 1000001
        #create df to test for all BQ datatypes except RECORD
        bools = np.random.randint(2, size=(1,test_size)).astype(bool)
        flts = np.random.randn(1,test_size)
        ints = np.random.randint(1,10, size=(1,test_size))
        strs = np.random.randint(1,10, size=(1,test_size)).astype(str)
        times = [datetime.datetime.now(pytz.timezone('US/Arizona')) for t in xrange(test_size)]
        df = DataFrame({'bools':bools[0], 'flts':flts[0], 'ints':ints[0], 'strs':strs[0], 'times':times[0]}, index=range(test_size))
        gbq.to_gbq(df,"pydata_pandas_bq_testing.new_test", project_id=PROJECT_ID, chunksize=10000)
        sleep(60) # <- Curses Google!!!

        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM pydata_pandas_bq_testing.new_test", project_id=PROJECT_ID)
        self.assertEqual(result['NUM_ROWS'][0], test_size)