def get_trip_info(tripID, nextStopID, seqq=0): """ get the start time of each trip from bishop peak :param tripID: trip id in bishop paek format in 6 digit :param nextStopID: next stop id in the bishop peak format as 6 digit number from 113120 to 114181 """ if tripID != 0: url_trip = webloc + 'gtfs' + '&action=' + 'stopTimes' + agencyID + "&tripID=" + str( tripID) for i in range(3): try: stop_list = u.get_json_from_url(url_trip).get("stopTimes") #print("clear") except: print("error") print(url_trip) stop_list = [] continue else: break if stop_list: start_time = stop_list[0].get("departureTime") if int(start_time.split(':')[0]) >= 24: start_time = '0' + str(int(start_time.split(':')[0]) - 24) + ':' + start_time.split(':')[ 1] + ':' + start_time.split(':')[2] if seqq != 0: print(stop_list[0].get("stopID"), nextStopID, tripID) start_time2 = to_zero(start_time) return start_time, start_time2 else: #in case the url cannot be reached return '0', '0'
def get_schedule(): schedule = {} for a in route_id_to_NB_tag: schedule[a] = {} for b in route_id_to_NB_tag[a]: schedule[a][route_id_to_NB_tag[a][b]] = {} url_schedule = nextbus_url + 'schedule&a=' + a + '&r=' + route_id_to_NB_tag[ a][b] json_schedule = u.get_json_from_url(url_schedule).get('route') for i in json_schedule: if i['serviceClass'] not in schedule[a][route_id_to_NB_tag[a] [b]]: schedule[a][route_id_to_NB_tag[a][b]][ i['serviceClass']] = {} schedule[a][route_id_to_NB_tag[a][b]][i['serviceClass']][ i['direction']] = {} for k in i.get('tr'): if k.get('blockID') not in schedule[a][route_id_to_NB_tag[ a][b]][i['serviceClass']][i['direction']]: schedule[a][route_id_to_NB_tag[a][b]][ i['serviceClass']][i['direction']][k.get( 'blockID')] = [ k.get('stop')[0].get('content') ] else: schedule[a][route_id_to_NB_tag[a][b]][ i['serviceClass']][i['direction']][k.get( 'blockID')].append( k.get('stop')[0].get('content')) return schedule
def topicpull(topic): json_data = utils.get_json_from_url(t_url) changes_to_pull = [] for item in json_data: if item.get('topic') == topic: changes_to_pull.append(item.get('_number')) pullchange.pull_changes(changes_to_pull)
def get_nextbus_route_vl(agency, route, t=0): """ Return a list of named tuples of AVL data for given route given the desired agency id, NB route tag, and last time info was obtained. :param agency: a str NextBus identifier of a transit agency :param route: a str NextBus identifier of a transit route for given agency :param t: Last time this function was called, in *msec Posix time*. Optional, default 0. :returns a list of VehiclePos named tuples. """ url = nextbus_url + 'vehicleLocations&a=' + agency + '&r=' + route + '&t=' + str( t) try: vehiclelist = u.get_json_from_url(url).get('vehicle') except ValueError: vehiclelist = [] vlist = [] if vehiclelist: for vehicle in vehiclelist: t = get_nextbus_vehicle_data(agency, vehicle) vlist.append(t) return vlist
def get_vehicle(): """ get vehicle list from bishop peak list :return list of vehicle operating """ url_veh = apiloc + "vehicle" + "&action=" + "list" + agencyID veh_list = u.get_json_from_url(url_veh).get("vehicle") ve_clean_list = [] unique = {} for i in veh_list: if i.get("routeID") > 1000: if i.get("vehicleName") not in unique.keys(): unique[i.get("vehicleName")] = i else: print i if i.get("updated") > unique[i.get("vehicleName")].get( "updated"): print unique[i.get("vehicleName")] unique[i.get("vehicleName")] = i print("unique update", i) for ve in unique.values(): ve_clean_list.append(clean_up(ve)) return ve_clean_list
def fetch(symbol, config): '''fetches stock data from api, return as a pandas dataframe''' print('***fetching stock data for ' + symbol + '***') # fetch stock data for a symbol param_list = [ 'function=' + config['function'], 'symbol=' + symbol, 'outputsize=' + config['output_size'], 'datatype=' + config['data_type'], 'apikey=' + config['api_key'] ] url = utils.url_builder(constants.BASEURL, param_list) json_data = utils.get_json_from_url(url) dataframe = {} try: dataframe = pd.DataFrame(list(json_data.values())[1]).transpose() except IndexError: print(json_data) dataframe = pd.DataFrame() pattern = re.compile('[a-zA-Z]+') dataframe.columns = dataframe.columns.map( lambda a: pattern.search(a).group()) # print(dataframe) return dataframe
def topicpull(topic): json_data = utils.get_json_from_url(t_url) changes_to_pull = [] for item in json_data: if item.get('topic') == topic: changes_to_pull.append(item.get('_number')) changes_to_pull.sort() pullchange.pull_changes(changes_to_pull)
def __get_abstract(self, identifier, identifier_type): url = "https://api.elsevier.com/content/abstract/" + identifier_type + "/" + identifier json_data = None try: json_data = utils.get_json_from_url(url, self.__get_header()) except urllib2.HTTPError as e: print "Error getting abstract" utils.print_http_error(e) raise e return json_data
def update_wallet(self): #check what wallet transactions are spent for wallet_utxo in self.get_wallet_utxos(): utxo_status = get_json_from_url( self.outspend_template.format(wallet_utxo[0], wallet_utxo[1])) if utxo_status["spent"]: self.spend_wallet_utxo(wallet_utxo[0], wallet_utxo[1], utxo_status["txid"], utxo_status["vin"]) spent_tx = get_json_from_url( self.tx_template.format(utxo_status["txid"])) block_timestamp = get_block_from_hash( utxo_status["status"]["block_hash"]) for idx, vout in enumerate(spent_tx["vout"]): if vout["scriptpubkey_address"] == "3EiAcrzq1cELXScc98KeCswGWZaPGceT1d": self.insert_wallet_receieve( utxo_status["txid"], idx, vout["value"], utxo_status["status"]["block_hash"], block_timestamp) self.conn.commit()
def __get_affiliation(self, identifier, identifier_type): url = "https://api.elsevier.com/content/affiliation/" + identifier_type + "/" + identifier json_data = None try: json_data = utils.get_json_from_url(url, self.__get_header()) except urllib2.HTTPError as e: print "Error retrieving affiliation information" utils.print_http_error(e) raise e return json_data
def get_all_tickers(self, from_file=True, debug=True): if from_file: df = pd.read_csv('./data/symbols.csv') ticker_list = df['symbol'].to_list() if debug: return ticker_list[:200] return ticker_list else: tickers_json = utils.get_json_from_url(self.ticker_url) df = pd.DataFrame.from_records(tickers_json['symbolsList']) return df['symbol'].to_list()
def extract_game(game_id): ''' extracts game data from endpoint given a game id :return: dict representing game data ''' url = GAME_ENDPOINT % game_id data = get_json_from_url(url) # NOTE: here, we can use the 'raw' marshmallow schemas to validate the data being returned from an endpoint # this can help us catch if any new fields are added, or if a field has unexpected values (null, incorrect datatype, etc) extracted_data = RawGame().load(data) return extracted_data
def get_nextbus_pred(agency, route, stop): """ Return a dictionary of NextBus arrival predictions given the desired agency id, NB route tag, and NB stop tag. :param agency: a str NextBus identifier of a transit agency :param route: a str NextBus identifier of a transit route for given agency :param stop: a str NextBus identifier of a stop/station on given route :returns a dict representation of the JSON feed, listing arrival predictions for the given stop / vehicles on given route. """ url = nextbus_url + 'predictions&a=' + agency + '&r=' + route + '&s=' + stop return u.get_json_from_url(url)
def get_journal_metrics(self, issn, initial_year=1900, end_year=2100): url = ''.join( ("https://api.elsevier.com/content/serial/title?issn=" + issn, "&view=", self.VIEW_STANDARD.type, "&date=" + str(initial_year) + "-" + str(end_year))) json_data = None try: json_data = utils.get_json_from_url(url, self.__get_header()) except urllib2.HTTPError as e: print "Error retrieving journal metrics -> " + url utils.print_http_error(e) raise e return json_data
def populate_financial_indicators(self, year): # Initialize lists and dataframe (dataframe is a 2D numpy array filled with 0s) missing_tickers, missing_index = [], [] d = np.zeros((len(self.tickers), len(self.indicators))) for t, _ in enumerate(tqdm(self.tickers)): url0 = utils.attach_api_key_to_url( self.income_statement_url + self.tickers[t], self.api_key) url1 = utils.attach_api_key_to_url( self.balance_sheet_statement_url + self.tickers[t], self.api_key) url2 = utils.attach_api_key_to_url( self.cash_flow_statement_url + self.tickers[t], self.api_key) url3 = utils.attach_api_key_to_url( self.financial_ratios_url + self.tickers[t], self.api_key) url4 = utils.attach_api_key_to_url( self.company_key_metrics_url + self.tickers[t], self.api_key) url5 = utils.attach_api_key_to_url( self.financial_statement_growth_url + self.tickers[t], self.api_key) a0 = utils.get_json_from_url(url0) a1 = utils.get_json_from_url(url1) a2 = utils.get_json_from_url(url2) a3 = utils.get_json_from_url(url3) a4 = utils.get_json_from_url(url4) a5 = utils.get_json_from_url(url5) # Combine all json files in a list, so that it can be scanned quickly combined = [a0, a1, a2, a3, a4, a5] all_dates = utils.find_in_json(combined, 'date') check = [s for s in all_dates if year in s] # find all 2018 entries in dates if len(check) > 0: date_index = all_dates.index( check[0] ) # get most recent 2018 entries, if more are present for i, _ in enumerate(self.indicators): ind_list = utils.find_in_json(combined, self.indicators[i]) try: d[t][i] = ind_list[date_index] except: d[t][ i] = np.nan # in case there is no value inserted for the given indicator else: missing_tickers.append(self.tickers[t]) missing_index.append(t) actual_tickers = [x for x in self.tickers if x not in missing_tickers] d = np.delete(d, missing_index, 0) # raw dataset raw_data = pd.DataFrame(d, index=actual_tickers, columns=self.indicators) return raw_data
def tickers_by_sectors_to_csv(self): tickers_sector = [] available_tickers = self.get_all_tickers(from_file=True, debug=False) for t in tqdm(available_tickers): url = utils.attach_api_key_to_url(self.profile_url + t, self.api_key) d = utils.get_json_from_url(url) tickers_sector.append(d['profile']['sector']) df = pd.DataFrame(tickers_sector, index=available_tickers, columns=['Sector']) df.index.names = ['Symbol'] df.to_csv('./data/sectors.csv')
def get_predict_from_url(agency,r,tag): url = nextbus_url + 'predictions&a=' + agency +'&r=' + r + '&s=' + tag try: pre_dict = u.get_json_from_url(url).get('predictions') except: print "this stop no prediction" logger.info("this stop no prediction") pass pre_dict=[] if 'direction' in pre_dict: p = pre_dict.get('direction').get('prediction') else: p=[] print "no prediction" logger.info("no prediction") return p
def __init__(self, api_key, choice=None, tunnel_url=None, tunnel_port=None): if tunnel_url and tunnel_port: ssh_tunnel.init_proxy(tunnel_url, tunnel_port) self.api_key = api_key url = 'https://api.elsevier.com/authenticate?platform=SCOPUS' if choice: url += '&choice=' + choice header = { 'Accept': 'application/json', 'X-ELS-APIKey': api_key } try: res = utils.get_json_from_url(url, header) self.token = res['authenticate-response']['authtoken'] except urllib2.HTTPError as e: print "Error authenticating" utils.print_http_error(e) exit(-1)
def extract_game_ids_for_date(date): ''' extracts game ids from schedule, that then get passed to the game endpoint for extracting individual games :param date: date to be queried :return: list of nhl game ids for that date ''' # pull raw schedule info from endpoint logger.info('Getting schedule info for %s' % date) url = SCHEDULE_ENDPOINT + '?startDate=%s&endDate=%s' % (date, date) data = get_json_from_url(url) # iterate through the schedule data to get the game ids for every game game_ids = [] for date in data.get("dates", []): for game in date.get("games", []): game_ids.append(game["gamePk"]) return game_ids
def get_multiplestop_pred(agency, route, stop_list): """ since fetching each url takes time, in order to shorten time for each itteration, it's helpful to fetch prediction of several stops at the same time this function is to generate the url for multiple stop prediction return a dictionary of NextBus arrival predictions given desired agency id, NB route tag, and stop list :param agency: a str NextBus identifier of a transit agency :param route: a str NextBus identifier of a transit route for given agency :param stop_list: a list of stop from route :returns a dict representation of the JSON feed, listing arrival predictions for the given stop / vehicles on given route. """ url = nextbus_url + 'predictionsForMultiStops&a=' + agency for stop in stop_list: if stop not in stopdic_tag.get(agency).get(route).keys(): continue stop_tag = stopdic_tag.get(agency).get(route).get(stop) url = url + '&stops=' + route + '|' + stop_tag return u.get_json_from_url(url)
def get_ticker_by_sector(self, sector, from_file=True, debug=True): if from_file: df = pd.read_csv('./data/sectors.csv', index_col='Symbol') tickers_selected = df[df['Sector'] == sector].index.values.tolist() if debug: return tickers_selected[:200] return tickers_selected else: tickers_sector = [] available_tickers = self.get_all_tickers() for t in tqdm(available_tickers): url = utils.attach_api_key_to_url(self.profile_url + t, self.api_key) d = utils.get_json_from_url(url) tickers_sector.append(utils.find_in_json(d, 'sector')) df = pd.DataFrame(tickers_sector, index=available_tickers, columns=['Sector']) tickers_selected = df[df['Sector'] == sector].index.values.tolist() return tickers_selected
def pull_one_change(raw_change_no): change_no = utils.segmentise_change_no(raw_change_no)[0] rev_no = utils.segmentise_change_no(raw_change_no)[1] url = p_url + change_no + "?o=ALL_REVISIONS" json_data = utils.get_json_from_url(url) proj_name = json_data.get('project') change_sub = json_data.get('subject') dir_path = utils.change_projname_to_dirpath(proj_name) print("Pulling change . . . ") print(Col.ylw + str(change_no) + "\t" + Col.grn + change_sub + Col.rst) print(" onto directory " + Col.pnk + dir_path + Col.rst) rev_numbers = [] rev_branches = [] rev_urls = [] revisions = json_data.get("revisions") for revision in revisions: rev_numbers.append(json_data["revisions"][revision].get("_number")) rev_branches.append(json_data["revisions"][revision]['fetch']['anonymous http']['ref']) rev_urls.append(json_data["revisions"][revision]['fetch']['anonymous http']['url']) rev_index = select_rev_index(rev_numbers, int(rev_no)) cherry_pick_change(dir_path, rev_urls[rev_index], rev_branches[rev_index])
def get_eta(routeID, stopID): """ get the estimate arrival time json :return dictionary of eta, key as routeID,stopID, value as list of the estimate arrival time :output_example: eta in epochtime format """ url_eta = webloc + "eta" + "&action=" + "list" + agencyID + "&routeID=" + str( routeID) + "&stopID=" + str(stopID) #print(url_eta) for i in range(3): try: eta_list = u.get_json_from_url(url_eta).get("stop") #print("clear") except: eta_list = [] print("eta error", url_eta) continue else: break if eta_list: return eta_list[0].get("ETA1") else: return eta_list
def pull_one_change(raw_change_no): change_no = utils.segmentise_change_no(raw_change_no)[0] rev_no = utils.segmentise_change_no(raw_change_no)[1] url = p_url + change_no + "?o=ALL_REVISIONS" json_data = utils.get_json_from_url(url) proj_name = json_data.get('project') change_sub = json_data.get('subject') dir_path = utils.change_projname_to_dirpath(proj_name) print("Pulling change . . . ") print(Col.ylw + str(change_no) + "\t" + Col.grn + change_sub + Col.rst) print(" onto directory " + Col.pnk + dir_path + Col.rst) rev_numbers = [] rev_branches = [] rev_urls = [] revisions = json_data.get("revisions") for revision in revisions: rev_numbers.append(json_data["revisions"][revision].get("_number")) rev_branches.append( json_data["revisions"][revision]['fetch']['anonymous http']['ref']) rev_urls.append( json_data["revisions"][revision]['fetch']['anonymous http']['url']) rev_index = select_rev_index(rev_numbers, int(rev_no)) cherry_pick_change(dir_path, rev_urls[rev_index], rev_branches[rev_index])
def fetch(indicator, symbol, config): '''fetches stock data from api, then outputs as a pandas dataframe''' print("fetching indicator " + indicator + " for " + symbol) # fetch stock data for each symbol dataframe = pd.DataFrame([]) params = [ 'function=' + indicator, 'symbol=' + symbol, 'interval=' + config['interval'], 'time_period=' + config['time_period'], 'series_type=' + config['series_type'], 'apikey=' + config['api_key'] ] url = utils.url_builder(constants.BASEURL, params) json_data = utils.get_json_from_url(url) dataframe = {} try: dataframe = pd.DataFrame(list(json_data.values())[1]).transpose() except IndexError: dataframe = pd.DataFrame() return dataframe
def show_list(url): json_data = utils.get_json_from_url(url) for item in json_data: print(Col.ylw + str(item.get("_number")) + Col.rst + "\t" + item.get("subject")) print(" " + Col.grn + item.get("project") + Col.rst + "\n")
def tickers_to_csv(self): tickers_json = utils.get_json_from_url(self.ticker_url) df = pd.DataFrame.from_records(tickers_json['symbolsList']) df.to_csv('./data/symbols.csv', index=False)
def scopus_search(self, query, view=VIEW_STANDARD, suppress_nav_links=False, date=None, start=0, count=25, field=None, sort=None): if not isinstance(query, Query): print( "Query parameter must be set and should be an instance of Query class. Exiting..." ) exit(-1) if not isinstance(view, View): print( "View parameter must be an instance of inner View class. Check attributes starting with View_* in " "ElsClient object. " "Program will exit...") exit(-1) if not isinstance(suppress_nav_links, bool): print( "suppress_nav_links parameter should be either True or False. Exiting..." ) exit(-1) query_quoted = urllib.quote_plus(query.get_query()) url = "https://api.elsevier.com/content/search/scopus?" \ "view=" + view.type + \ "&query=" + query_quoted + \ "&suppressNavLinks=" + str(suppress_nav_links).lower() if date: url += "&date=" + date if field: url += "&field=" + field url += "&start=" + str(start) + "&count=" + str(count) if sort: if not isinstance(sort, list) and not isinstance(sort, tuple): print "Sort parameter must be either a list or tuple of a maximum of 3 Sort elements. Program will exit..." exit(-1) if len(sort) > 3: print "Sort parameter has a maximum of 3 elements. Program will exit..." exit(-1) l = [] for s in sort: if not isinstance(s, Sort): print( "All elements of sort parameter must be of Sort class. Check attributes starting with Sort_* " "in ElsClient object. Program will exit...") exit(-1) l.append(s.type) sort_joined = ",".join(l) url += "&sort=" + sort_joined json_data = None try: json_data = utils.get_json_from_url(url, self.__get_header()) except urllib2.HTTPError as e: print "Error while retrieving information from SCOPUS:" utils.print_http_error(e) raise e return json_data
def show_list(url): json_data = utils.get_json_from_url(url) for item in json_data: print(Col.ylw + str(item.get('_number')) + Col.rst + "\t" + item.get('subject')) print(" " + Col.grn + item.get('project') + Col.rst + "\n")