def test_user_creation_validation_error(self): query = """ mutation myFirstMutation { createUser(email: 178, firstName: "Joe", lastName: "Doe") { user { id } } } """ with self.assertRaises(GraphQLError): run_query(schema, query)
def fetch_data(test_name_prefix): job_status_dataframe = utils.run_query( JOB_STATUS_QUERY, cache_key=('job-status-%s' % test_name_prefix), config=_get_query_config(test_name_prefix)) metrics_dataframe = utils.run_query( METRIC_STATUS_QUERY, cache_key=('metric-status-%s' % test_name_prefix), config=_get_query_config(test_name_prefix)) combined_dataframe = process_dataframes(job_status_dataframe, metrics_dataframe) return combined_dataframe
def update(config, symbol): conn_cred = config['conn_cred'] app = TestApp("127.0.0.1", 7497, 1) ibcontract = IBcontract() ibcontract.secType = "STK" # ibcontract.lastTradeDateOrContractMonth="202011" ibcontract.symbol = symbol ibcontract.exchange = "SMART" ibcontract.currency = "USD" ibcontract.primaryExchange = "NASDAQ" resolved_ibcontract = app.resolve_ib_contract(ibcontract) # print(resolved_ibcontract) historic_data = app.get_IB_historical_data(resolved_ibcontract, durationStr="1 W", barSizeSetting="5 secs") print('pulled historical data. converting data to something mysql expects') df = pd.DataFrame( historic_data, columns=['datetime', 'open', 'high', 'low', 'close', 'volume']) df['symbol'] = symbol df['datetime'] = pd.to_datetime(df['datetime'], format='%Y%m%d %H:%M:%S') df['epoch'] = (df['datetime'] - datetime(1970, 1, 1)).dt.total_seconds() + (480 * 60) list_vals = df[[ 'symbol', 'epoch', 'open', 'high', 'low', 'close', 'volume' ]].values.tolist() # list_vals = (tuple(i) for i in list_vals) # for executemany() print('inserting to sql database') ## robust one-by-one insertion for i in range(len(list_vals)): query = "INSERT INTO {dbname}.bar_data (symbol, epoch,\ open, high,low, close, volume\ ) VALUES ({csv})".format(dbname=conn_cred['dbname'], csv=','.join( map(lambda x: "'" + str(x) + "'", list_vals[i]))) run_query(conn_cred, query) ## executemany (supposed to be a gajillion times faster) ## dunno how to make this work tho # query = "INSERT INTO {dbname}.bar_data (symbol, epoch,\ # open, high,low, close, volume\ # ) VALUES (%s)".format(dbname=conn_cred['dbname'], # symbol=symbol) # dbconn, cursor = mysql_conn(conn_cred['dbname']) # cursor.executemany(query, list_vals) # db_conn_close() print('done updating') quit()
def fetch_data(test_name_prefix): dataframe = utils.run_query(JOB_STATUS_QUERY, cache_key=('job-status-%s' % test_name_prefix), config=_get_query_config(test_name_prefix)) metrics_dataframe = utils.run_query( METRIC_STATUS_QUERY, cache_key=('metric-status-%s' % test_name_prefix), config=_get_query_config(test_name_prefix)) # Collect all test+date combinations where metrics were out of bounds. oob_tests = collections.defaultdict(list) def _test_date_key(test, date): return '{}:{}'.format(test, date) for row in metrics_dataframe.iterrows(): oob_test_name = row[1]['test_name'] oob_run_date = row[1]['run_date'] oob_metric_name = row[1]['metric_name'] oob_upper_bound = row[1]['metric_upper_bound'] oob_lower_bound = row[1]['metric_lower_bound'] failure_explanation = ( f'Metric `{oob_metric_name}` was outside expected bounds of: ' f'({oob_lower_bound}, {oob_upper_bound})') oob_tests[_test_date_key(oob_test_name, oob_run_date)].append(failure_explanation) dataframe['overall_status'] = dataframe['job_status'].apply(lambda x: x) # Record the status of the metrics for every test. dataframe['failed_metrics'] = dataframe['job_status'].apply(lambda x: []) for row in dataframe.iterrows(): test_name = row[1]['test_name'] failed_metrics = oob_tests.get( _test_date_key(test_name, row[1]['run_date'])) or [] if failed_metrics: dataframe['failed_metrics'][row[0]] = failed_metrics dataframe['overall_status'][row[0]] = 'failure' # Create a few convenience columns to use in the dashboard. dataframe['job_status_abbrev'] = dataframe['overall_status'].apply( lambda x: '' if x.startswith('success') else x[:1].upper()) dataframe['metrics_link'] = dataframe['test_name'].apply( lambda x: 'metrics?test_name={}'.format(x)) dataframe['logs_download_command'] = dataframe['logs_link'].apply( utils.get_download_command) return dataframe
def get_youngest_persons_name(): min_age = select([func.min(tables.Person.c.date_of_birth).label('date_of_birth')])\ .alias('some_alias') stmt = select([tables.Person.c.name]).select_from( tables.Person.join( min_age, tables.Person.c.date_of_birth == min_age.c.date_of_birth)) return utils.run_query(stmt, engine)
def predict_consumers(): consumer_product_records = run_query( "select ConsumerProductID, ConsumerID, DateOfPurchase from consumer_product" ) consumer_product_df = conv_to_df(consumer_product_records) filtered_df = preprocess_data(consumer_product_df) # Dividing Train - Test data set taking all days of March, April and May 2018 as test data and rest as train data train = filtered_df.loc[:'2018-02-28'] test = filtered_df.loc['2018-03-01':] y_hat_avg = test.copy() fit1 = sm.tsa.statespace.SARIMAX(train.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() y_hat_avg['SARIMA'] = fit1.predict(start="2018-03-01", end="2018-05-24", dynamic=True) y_hat_avg['SARIMA'] = fit1.forecast(len(test)) rms = sqrt(mean_squared_error(test.Count, y_hat_avg.Holt_Winter)) print("RMSE is {}".format(rms)) predicted_daily = fit1.predict(start="2018-06-01", end="2018-08-31", dynamic=True) predicted_monthly = predicted_daily.resample('m').sum() print("Predicted Number of Consumers in June 2018 = {}".format( int(predicted_monthly.loc['2018-06']))) print("Predicted Number of Consumers in July 2018 = {}".format( int(predicted_monthly.loc['2018-07']))) print("Predicted Number of Consumers in August 2018 = {}".format( int(predicted_monthly.loc['2018-08'])))
def add_selibr_from_viaf(viaf_file, upload): """ 1. identify VIAF posts that have a SELIBR 2. check if therethere is a Wikidata item with this VIAF 3. but it does not have selibr 4. Add SELIBR to those items. """ edit_summary = make_editgroups_summary("Adding SELIBR based on VIAF") with open(viaf_file) as myfile: for line in myfile: cleanline = is_selibr_line(line) if cleanline: viaf = cleanline[0] selibr = cleanline[1] onwikidata = utils.run_query("get_item_with_property_value.rq", (PROPERTIES["viaf"], viaf)) if not onwikidata: continue q = onwikidata[0]["item"] if utils.is_human(q): selibr_value = utils.get_claim(q, PROPERTIES["selibr"]) if not selibr_value: print(q) if upload: utils.wd_add_unique_claim( q, { "prop": PROPERTIES["selibr"], "value": selibr }, edit_summary)
def fetch_data(self, state): dataframe = run_query(QUERY % {'state': NAMES_TO_CODES[state]}, cache_key=('population-%s' % NAMES_TO_CODES[state]), dialect='standard') dataframe.index = np.arange(1, len(dataframe) + 1) return dataframe
def get_youngest_persons_name2(): ordered_age = select( [tables.Person.c.name, func.row_number().over(order_by=tables.Person.c.date_of_birth).label('row_no')])\ .alias('some_alias') stmt = select([ordered_age.c.name]).where(ordered_age.c.row_no == 1) return utils.run_query(stmt, engine)
def fetch_data(test_names, metric_names): if not test_names or not metric_names: raise ValueError('Neither test_names nor metric_names can be empty.') dataframe = utils.run_query( get_query(test_names, metric_names), cache_key=('metrics-{}-{}'.format(str(test_names), str(metric_names))), config=get_query_config(test_names, metric_names)) return dataframe
def fetch_data(self, state): dataframe = run_query(QUERY, cache_key=('air-%s' % NAMES_TO_CODES[state])) dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp']) dataframe['day'] = dataframe.timestamp.apply(lambda x: x.day) dataframe['minutes'] = dataframe.timestamp.apply(lambda x: x.minute) dataframe['hour'] = dataframe.timestamp.apply(lambda x: x.hour) return dataframe
def fetch_data(test_name): dataframe = utils.run_query( QUERY, cache_key=('metrics-%s' % test_name), config=_get_query_config(test_name)) dataframe['logs_download_command'] = dataframe['logs_link'].apply( utils.get_download_command) return dataframe
def fetch_data(self, state): query = QUERY % {'state': NAMES_TO_CODES[state], 'year': YEAR} dataframe = run_query(query, cache_key='temperature-%s' % NAMES_TO_CODES[state]) dataframe['date'] = pd.to_datetime(dataframe[['year', 'month', 'day']]) dataframe['date_readable'] = dataframe['date'].apply(lambda x: x.strftime("%Y-%m-%d")) dataframe['left'] = dataframe.date - pd.DateOffset(days=0.5) dataframe['right'] = dataframe.date + pd.DateOffset(days=0.5) dataframe = dataframe.set_index(['date']) dataframe.sort_index(inplace=True) return dataframe
def get_writer_count_per_book_title(): sql = """ select title, count(distinct person_id) as writer_count from literator.book bb left join literator.write ww on bb.book_id = ww.book_id group by title order by writer_count desc, title """ return utils.run_query(sql, engine)
def get_youngest_persons_name(): sql = """ with min_age as (select min(date_of_birth) as date_of_birth from literator.person) select name from literator.person pp inner join min_age on pp.date_of_birth = min_age.date_of_birth """ return utils.run_query(sql, engine)
def get_youngest_persons_name2(): sql = """ with ordered_age as (select name , row_number() over (order by date_of_birth asc) as row_no from literator.person) select name from ordered_age where row_no = 1 """ return utils.run_query(sql, engine)
def fetch_data(self, state): dataframe = run_query(QUERY % { 'state': NAMES_TO_CODES[state], 'year': YEAR }, cache_key=('precipitation-%s' % NAMES_TO_CODES[state])) dataframe['date'] = pd.to_datetime(dataframe[['year', 'month', 'day']]) dataframe['date_readable'] = dataframe['date'].apply( lambda x: x.strftime("%Y-%m-%d")) return dataframe
def get_writer_count_per_book_title(): # distinct can be used as a standalone function as well # distinct (tables.Write.c.person_id) instead of tables.Write.c.person_id.distinct() # true for asc and desc stmt = select([tables.Book.c.title, func.count(tables.Write.c.person_id.distinct()).label( 'writer_count')])\ .select_from( tables.Book.join(tables.Write, tables.Book.c.book_id == tables.Write.c.book_id))\ .group_by(tables.Book.c.title)\ .order_by(desc('writer_count'), tables.Book.c.title.asc()) # .order_by(func.count(tables.Write.c.person_id.distinct()).desc(), tables.Book.c.title.asc()) also works return utils.run_query(stmt, engine)
def rate_restaurant(): # Use it to check that the restaurant exists query = """ PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX ont: <http://schema.org/ontology/> SELECT ?name WHERE { <%s> foaf:name ?name } """ while True: print('Type the url of the restaurant (or q to quit):') uri = input('> ').lower().strip() if uri in ('q', 'quit'): break try: uri = restaurant_uri(uri) except ValueError as e: print(e) continue df = run_query(query, uri) if not len(df): print( 'Error: the provided restaurant doesn\'t appear in our dataset' ) continue print(df.iloc[0, 0]) print() print('Type a rating for this restaurant (or q to quit):') rating = input('> ').lower().strip() if rating in ('q', 'quit'): break try: rating = int(rating) if rating < 1 or rating > 5: raise ValueError('rating must be between 1 and 5') except ValueError as e: print('Error: %s' % e) continue MAIN_MENU.variables['user'][uri] = rating print('\nRating added to profile\n')
def get_top_k_brands_sold(k): if k in nonelist or k < 1: return [] records = run_query( "SELECT sold_plan.SoldPlanID, consumer_product.BrandID FROM sold_plan INNER JOIN " "consumer_product ON sold_plan.ConsumerProductID=consumer_product.ConsumerProductID" ) brandid_sold_array = [record['BrandID'] for record in records] k_most_common = Counter(brandid_sold_array).most_common(k) return [{ 'BrandID': brand, 'Count': count } for brand, count in k_most_common]
def restaurant_information(): query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX ont: <http://schema.org/ontology/> SELECT ?predicate ?object WHERE { <%s> ?predicate ?object } """ while True: print('Type the url of the restaurant (or q to quit):') uri = input('> ').lower().strip() if uri in ('q', 'quit'): break try: uri = restaurant_uri(uri) except ValueError as e: print(e) continue df = run_query(query, uri) if not len(df): print( 'Error: the provided restaurant doesn\'t appear in our dataset' ) continue d = dict(zip(df.predicate, df.object)) rest_cuisine = list( restaurant_cuisine[ restaurant_cuisine.restaurant == uri ].cuisine.unique() ) print('=' * 50) print(d['foaf:name']) print('-' * 50) print('Mean rating:', d['ont:rating']) print('Cuisine types: %s' % ', '.join(rest_cuisine)) if 'ont:address' in d: print('Address:', d['ont:address']) if 'phone' in d: print('Phone:', d['ont:phone']) print('=' * 50) print()
def test_user_creation(self): query = """ mutation myFirstMutation { createUser(email: "*****@*****.**", firstName: "Joe", lastName: "Doe") { user { id } } } """ data = run_query(schema, query) expect = { "createUser": { "user": { 'id': id }, } } self.assertDictContainsSubset(expect, data)
def test_post_creation(self): user = UserFactory() query = """ mutation myFirstMutation { createPost(userId: "%s", title: "Just do it", content: "Yesterday you sad tomorrow") { post { id } } } """ % user.id data = run_query(schema, query) expect = { "createPost": { "post": { 'id': id }, } } self.assertDictContainsSubset(expect, data)
def test_make_commnet(self): post = PostFactory() query = """ mutation myFirstMutation { makeComment(postId: "%s", name: "Just do it", content: "Yesterday you sad tomorrow") { post { id } } } """ % post.id data = run_query(schema, query) expect = { "makeComment": { "post": { 'id': id }, } } self.assertDictContainsSubset(expect, data)
def get_average_plans_sold_per_week(): records = run_query("select SoldPlanID, DateOfPurchase from sold_plan") start_date = min([record['DateOfPurchase'] for record in records]) end_date = max([record['DateOfPurchase'] for record in records]) start_date_num = dates.datestr2num(start_date.strftime('%Y%m%d')) end_date_num = dates.datestr2num(end_date.strftime('%Y%m%d')) date_range = [x for x in range(int(start_date_num), int(end_date_num + 1))] daily_plan_sold_array = [0] * len(date_range) for record in records: date = record.get('DateOfPurchase') idx = int(dates.datestr2num( date.strftime('%Y%m%d'))) - int(start_date_num) daily_plan_sold_array[idx] += 1 total_plans_sold = len(records) total_days = len(date_range) total_weeks = total_days / 7 average_plans_sold_per_week = total_plans_sold / total_weeks return average_plans_sold_per_week
def get_current_profile(): query = """ PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX ont: <http://schema.org/ontology/> SELECT ?name WHERE { <%s> foaf:name ?name } """ user = MAIN_MENU.variables['user'] if not user: print('User profile is empty!') rows = [] for uri, rating in user.items(): # In case the URIs do not belong to the database (come from TripAdvisor) uri = restaurant_uri(uri) df = run_query(query, uri) if not len(df): print( 'Error: %s doesn\'t appear in our dataset' % uri ) continue name = df.iloc[0, 0] rows.append((name, rating)) df = pd.DataFrame(rows, columns=['restaurant', 'rating']) if len(df): print(df) else: print('No ratings yet!')
def fetch_wsc_data(self, station): return run_query(station, cache_key=('hydrograph-%s' % station))
def fetch_data(self, state): dataframe = run_query(QUERY, cache_key=('air-%s' % NAMES_TO_CODES[state])) dataframe['date_readable'] = dataframe['timestamp'].apply( lambda x: x.strftime("%H-%M-%S")) return dataframe
def get_fraction(): query = "SELECT a.on_plan_count/b.total_request FROM " \ "(SELECT COUNT(*) AS on_plan_count FROM consumer_servicerequest WHERE SoldPlanID != 0) a, " \ "(SELECT COUNT(*) AS total_request FROM consumer_servicerequest) b" result = run_query(query) return result
def get_longest_reading_session_length(): stmt = select( [func.max(tables.Write.c.end_time - tables.Write.c.start_time)]) return utils.run_query(stmt, engine)