def get_projetos(self, list_palavras_chaves): """ Returns a list of integers with all pls ids Args ------- list_palavras_chaves: list of strings -> All keywords from all subjects Returns --------- list of integers -> list of integers with all pls ids """ ids_projetos = [] encoded_url = self.build_pls_request_url(list_palavras_chaves) projetos_json = utils.get_request(encoded_url).json() page_req = self.ultima_pagina_requisicao(projetos_json) try: last_page = page_req["page"] last_page_req = page_req["request"] except TypeError: return else: request_projetos = self.ultimo_get_requests( last_page, last_page_req) for proj_req in request_projetos: get_request = utils.get_request(proj_req) json_projeto = get_request.json() for projeto in json_projeto["dados"]: ids_projetos.append(projeto["id"]) return ids_projetos
def main(): for k in range(15, 50): prop_resp = get_request('[{"action":"addProperty","title":"' + f'prop: {k}' + '","type":"text"}]') prop_id = get_id(prop_resp) for i in range(1, 150): action = '[{"action": "addVariant", "property_id": ' + str( int(prop_id)) + ', "title": "' + f'item {i}' + '"}]' resp_json = get_request(action) item_id = get_id(resp_json) print(f'prop: {k}; item: {i}') if item_id == 0: break time.sleep(0.3) print('=' * 120)
def get_data_from_grobid(command, pdf_file): """ Send post request to grobid and returned data """ return utils.get_request("{}{}".format(GROBID_SERVER, command), POST=True, att_file={'input': pdf_file}, timeout=30, data={"timeout": 300})
def scrap_content(url): res = get_request(url) soup = to_soup(res) content = soup.find("div", {"id": "inner-block"}) content = remove_attributes(content) return clear_content(content)
def get_clip_names(self): """ Given a camera's alias string, a unix start timestamp in milliseconds and a unix end timestamp in milliseconds, return a list with clip names within that period. """ endpoint = '/cameras/recording/{0}?'.format(self.alias) + \ 'startDate={0}&endDate={1}&allowPartialMatch=true'.format( self.metadata['startTimestampInMs'], self.metadata['endTimestampInMs']) response = utils.get_request(config.RECORDINGS_URL, endpoint) if response: for clip in response: try: clip_name = clip['recordingUrl'] self.clips.append(utils.get_basename_str(clip_name)) except KeyError as e: logger.error('{0} returned {1}'.format(url, e)) logger.info('Retrieved clip(s): {0}'.format(self.clips)) else: logger.info('No clips were found for {}'.format(endpoint))
def get_schools_stats(year): """ :param year: string or int :return: pandas.DataFrame containing statistics for each team for a given year """ base_url = "https://www.sports-reference.com/cbb/seasons/" url = base_url + str(year) + '-school-stats.html' r = get_request(url, headers={"User-Agent": "Mozilla/5.0"}) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') table = soup.find_all('table')[0] hrefs = table.find_all('a', href=True) link_names = [] for href in hrefs: link_names.append(href['href'].split('/')[3]) data = parse_table(table) columns = get_table_header(table, index=1) df = pd.DataFrame(data, index=np.arange(1, len(data) + 1), columns=columns) df['NCAA'] = [el.endswith('NCAA') for el in df[df.columns[0]]] df[df.columns[0]] = df[df.columns[0]].str.replace('NCAA', '').str.strip() df['Link names'] = link_names return df
def mfold_result(task_id, zipname="now.zip", path="./results/mfold/", verbose=True): """Gets mfold result via task_id Args: task_id: Id of task which was given by RESTful API zipname: Name of zip file in which client will save results. After save this file is removed path: Path where results should be stored verbose: Bool which tells if function should print what she actualy does Returns: None """ req = get_request("mfold_result", task_id) with open(zipname, "wb") as f: for chunk in req.iter_content(): f.write(chunk) unzip(zipname, path) os.remove(zipname) if verbose: print("Result under: {}/\n".format(path + task_id))
def get_team_schedule(team, year): """ :param team: string :param year: string or int :return: pandas.DataFrame containing the schedule of the team for the given year """ base_url = "https://www.sports-reference.com/cbb/schools/" url = base_url + team + '/' + str(year) + '-schedule.html' r = get_request(url, headers={"User-Agent": "Mozilla/5.0"}) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') for caption in soup.find_all('caption'): if caption.get_text() == 'Schedule and Results Table': table = caption.find_parent('table') data = parse_table(table) columns = get_table_header(table) df = pd.DataFrame(data, index=np.arange(1, len(data) + 1), columns=columns) return df
def get_url_deputado(self, json_projeto, relator=False): """ Returns camara api url for deputy request Args ------- json_projeto: dict -> json returned on specific pl request relator: boolean -> indicates if the url to be returned is from a reporter Returns -------- string -> camara api url for deputy request """ proposicao = str(json_projeto["dados"]["id"]) if relator: url_deputado = ( json_projeto["dados"]["statusProposicao"]["uriUltimoRelator"]) else: url_autores_pl = (constants.URL_API_CAMARA + f"proposicoes/{proposicao}/autores") req_autores_pl = utils.get_request(url_autores_pl) json_autores_pl = req_autores_pl.json() url_deputado = json_autores_pl["dados"][0]["uri"] return url_deputado, proposicao
def from_existing(cls, droplet_id, authentication_token): droplet = utils.get_request("droplets/{0}".format(droplet_id), authentication_token) if not droplet.ok: raise Exception("Droplet not Created as Could Retrieve Data of Existing Node! Message Returned: {0}".format(droplet.json()["message"])) return cls(droplet.json()["droplet"], authentication_token)
def getMPInfo(self): """Method queries theyworkforyou again, for the full info on a given member of parliament""" url = 'https://www.theyworkforyou.com/api/getMPInfo?key=%s&id=%s&output=js' % (theyworkyou_apikey, self.person_id) request = get_request(url=url, user=None, headers={}) # literal eval the json request into actual json self.full_info = ast.literal_eval(request.content)
def get_sizes(self): get_sizes_response = utils.get_request("sizes", self.authentication_token) if not get_sizes_response.ok: raise Exception("Could not get sizes! See error message: {0}".format(get_sizes_response.json()["message"])) return [Size(size_data) for size_data in get_sizes_response.json()["sizes"]]
def get_regions(self): get_regions_response = utils.get_request("regions", self.authentication_token) if not get_regions_response.ok: raise Exception("Could not get regions! See error message: {0}".format(get_regions_response.json()["message"])) return [Region(region_data) for region_data in get_regions_response.json()["regions"]]
def get_domain_records(self, domain_name): get_domain_records_response = utils.get_request("domains/{0}/records".format(domain_name), self.authentication_token) if not get_domain_records_response.ok: raise Exception("Failed to get domain records! See error message: {0}".format(get_domain_records_response.json["message"])) return [DomainRecord(domain_record_data) for domain_record_data in get_domain_records_response.json()["domain_records"]]
def get_keys(self): get_keys_response = utils.get_request("account/keys", self.authentication_token) if not get_keys_response.ok: raise Exception("Could not get ssh keys! See error message: {0}".format(get_keys_response.json()["message"])) return [Key(key_data, self.authentication_token) for key_data in get_keys_response.json()["ssh_keys"]]
def from_existing(cls, domain_name, authentication_token): get_domain_response = utils.get_request("domains/{0}".format(domain_name), authentication_token) if not get_domain_response.ok: raise Exception("Failed to retrieve data for domain! See error message: {0}".format(get_domain_response.json()["message"])) return cls(get_domain_response.json()["domain"], authentication_token)
def save_senado_project(self, projetos, keywords, ong): """ Saves pl from the senate in the database Args ----------- projetos: list of strings -> All projects keywords: list of string -> All keywords from all subjects ong: dict -> Data from ong """ for projeto in projetos: db_data = {} id_projeto = projeto['id'] proj_req = utils.get_request(constants.URL_API_SENADO + f"materia/{id_projeto}").json() ementa = utils.get_from_dict(self.campos_banco["ementa"], proj_req) # ementa = (proj_req['DetalheMateria'] # ['Materia'] # ["DadosBasicosMateria"] # ["EmentaMateria"]) try: codigo_situacao_pl = ( proj_req['DetalheMateria']['Materia']['SituacaoAtual'] ['Autuacoes']['Autuacao']['Situacao']['CodigoSituacao']) except TypeError: situacoes = (proj_req['DetalheMateria']['Materia'] ['SituacaoAtual']['Autuacoes']) codigo_situacao_pl = ( situacoes['Autuacao'][0]['Situacao']['CodigoSituacao']) situacao_arquivada = self.get_codigo_pl_arquivado() senador = Senador() if (utils.search_keyword(ementa, keywords) and situacao_arquivada != codigo_situacao_pl): json_autor = senador.get_dados_autor(proj_req, id_projeto) dados_pl = self.get_dados_pl(proj_req, id_projeto, projeto['data'], ong["Name"]) dados_relator = senador.get_dados_relator(id_projeto) db_data.update(dados_pl) db_data.update(json_autor) db_data.update(dados_relator) el_data = db_data utils.save_projeto_to_db(db_data) pl_datetime = (datetime.strptime(el_data['data'], "%d/%m/%Y")) el_data['data'] = datetime.strftime(pl_datetime, "%Y/%m/%d") el_data['tags_ementa'] = utils.get_tags_from_string(ementa) el_data['tags_tramitacao'] = utils.get_tags_from_string( dados_pl["tramitacao"]) el_data['keywords'] = utils.get_ementa_keyword( keywords, ementa) del el_data['_id'] constants.es.index(index='projects', doc_type='project', body=el_data)
def get_action(self, action_id): get_action_response = utils.get_request("images/{0}/actions/{1}".format(self.id, action_id), self.authentication_token) if not response.ok: raise Exception("Failed to get action! See error message: {0}".format(response.json()["message"])) return Action(get_action_response.json()["action"])
def _get_list_of_data(self, class_of_data, name_of_data): response = utils.get_request(name_of_data, self.authentication_token) if not response.ok: raise Exception("Error when retrieving list of {0}! See error message: {1}".format(name_of_data, response.json()["message"])) return [class_of_data(data_object, self.authentication_token) for data_object in response.json()[name_of_data]]
def from_existing(cls, image_id, authentication_token): get_image_response = utils.get_request("images/{0}".format(image_id), authentication_token) if not get_image_response.ok: raise Exception("Could not retrieve specified Image! See error message: {0}".format(get_image_response.json()["message"])) return cls(get_image_response.json()["image"], authentication_token)
def from_existing(cls, key_id, authentication_token): get_key_response = utils.get_request("account/keys/{0}".format(key_id), authentication_token) if get_key_response.ok: return cls(get_key_response.json()["ssh_key"], authentication_token) else: raise Exception("Could not get ssh key information! See error message: {0}".format(get_key_response.json()["message"]))
def get_list_of_executions(self, offset): params = { "zqlQuery": "project=%s AND cycleName='%s'" % (self._project_name, self._cycle_name), "offset": offset } result = utils.get_request(utils.ZapiCalls.GET_ZQL_SEARCH, params) return result.json()
def get_tasks_data(tasks_url: str) -> list: tasks = get_request(url=tasks_url) if tasks: tasks_data = list() for task in tasks: if len(task) > 1: # т.к может прийти невалидный объект if len(task['title']) > 48: task['title'] = task['title'][:48] + '...' tasks_data.append(task) return tasks_data
def get_submissions(kwargs): ''' Keyword arguments are fed into this function, builds URL based on arguments Then makes a GET request to the URL :param kwargs: :return: JSON response ''' search_subreddit_url = 'http://api.pushshift.io/reddit/search/submission/' url = build_url(search_subreddit_url, kwargs) response = get_request(url) return jsonify(response.json())
def patched_company(patch): """ get a patched company """ companies_house_user = '******' headers = {} company_number = patch.split('/')[-1] url = 'https://api.companieshouse.gov.uk/company/%s' % (company_number) request = get_request(url=url, user=companies_house_user, headers=headers) data = request.json() return data
def getPerson(self): """Method to set more variables""" url = 'https://www.theyworkforyou.com/api/getPerson?key=%s&id=%s&output=js' % (theyworkyou_apikey, self.person_id) request = get_request(url=url, user=None, headers={}) # literal eval the json request into actual json self.person = ast.literal_eval(request.content) self.person = self.person[0] self.first_name = self.person['given_name'].decode('latin-1').encode("utf-8") self.last_name = self.person['family_name'].decode('latin-1').encode("utf-8")
def get_files(self): # get files from the server if self.job == lawyer: # request lawyer files self.f1 = utils.get_request(utils.SERVER_ADDRESS + "profile/lawyer") else: # request doctor files self.f1 = utils.get_request(utils.SERVER_ADDRESS + "profile/doctor") if self.sex == female: # request vacation files self.f2 = utils.get_request(utils.SERVER_ADDRESS + "profile/female") else: # request puppy files self.f2 = utils.get_request(utils.SERVER_ADDRESS + "profile/male") b64 = lambda x: base64.b64decode(x) self.f1 = b64(self.f1) self.f2 = b64(self.f2)
def main( group_name7 ): df2 = pd.read_excel('./File/' + date_XX_XX_XXXX + '/Альбомы для ВК.xlsx') # sel=df2[df2['Группа']== group_name7] for group_name, df in df2.groupby('Группа'): if group_name != group_name7 : continue print(f'Process group {group_name}') # create album album = api.market.addAlbum(title=group_name, owner_id=owner_id) time.sleep(0.3) album_id = album.market_album_id property_response = get_request(f'[{{"action":"addProperty","title":"Размер","type":"text"}}]', hash=csrf_hash, group_id=group_id) prop_id = get_id(property_response) sizes = df['Размер'].map(lambda x: str(x).split(',')) sizes = {str(x_i).strip() for x in sizes for x_i in x} sizes -= {''} sizes_dict = dict() for size in sizes: action = f'[{{"action": "addVariant", "property_id": {str(int(prop_id))}, "title": "{size}"}}]' resp_json = get_request(action, hash=csrf_hash, group_id=group_id) item_id = get_id(resp_json) sizes_dict[size] = item_id if item_id == 0: print(f"ERROR in album {group_name} -- too much sizes") break download_all_photo(df) upload_market_items(album_id, df,sizes_dict)
def get_alias(self): """ Given a camera's cameraId string, calls the cameras-service API to retrieve the camera's alias. """ endpoint = '/cameras/{}'.format(self.metadata['cameraId']) try: self.alias = utils.get_request(config.CAM_SERVICES_URL, endpoint)['alias'] logger.debug('Cam alias retrieved: {}'.format(self.alias)) except (KeyError, TypeError) as e: logger.error('{0} returned {1}'.format(endpoint, e))
def get_fin_stat_links(): """ Get urls of new financial statements news from Source: https://zse.hr/default.aspx?id=36774&ticker1=&Page=1 """ url = 'https://zse.hr/default.aspx?id=36774&ticker1=&Page=1' res = get_request(url) bs = BeautifulSoup(res.text, features="lxml") link = [a['href'] for a in bs.find_all("a", href=True)] link = ["http://www.zse.hr/" + l for l in link if "UserDocsImages/financ" in l] dtime = [date.get_text().strip() for date in bs.select('.vijestRowDatumDat')] dtime = pd.to_datetime(dtime) return dtime, link
def get_projetos(self, keywords): projetos = [] try: for codigo in keywords: yesterday = date.today() - timedelta(days=1) weekday = yesterday.weekday() days_const = 1 # if it's monday get friday pls if weekday == self.day_of_week["dom"]: days_const = 3 req = utils.get_request( self.api_url + "materia/atualizadas?" f"codAssuntoEspecifico={codigo}&numdias=" f"{self.dias_requisicao + days_const}").json() materias = ( req['ListaMateriasAtualizadas']['Materias']['Materia']) for materia in materias: try: pl_date = (materia["AtualizacoesRecentes"] ["Atualizacao"]["DataUltimaAtualizacao"]) pl_datetime = (datetime.strptime( pl_date, "%Y-%m-%d %H:%M:%S")) pl_date_str = datetime.strftime( pl_datetime, "%d/%m/%Y") projetos.append({ "id": materia["IdentificacaoMateria"]["CodigoMateria"], "data": pl_date_str }) except TypeError: pl_date = ( materia["AtualizacoesRecentes"]["Atualizacao"][-1] ["DataUltimaAtualizacao"]) pl_datetime = (datetime.strptime( pl_date, "%Y-%m-%d %H:%M:%S")) pl_date_str = datetime.strftime( pl_datetime, "%d/%m/%Y") projetos.append({ "id": materia["IdentificacaoMateria"]["CodigoMateria"], "data": pl_date_str }) except KeyError: print("There's no pls in this date range and this topic") sys.exit(1) else: return projetos
def get_elastic_submissions(kwargs): ''' Keyword arguments fed into this function, builds URL based on arguments Then makes a GET request to the URL :param kwargs: :return: JSON response ''' # Example query # https://elasticsearch.pushshift.io/?q="Carrie Fisher" AND score:>100&sort=created_utc:desc&size=100 search_subreddit_url = 'https://elastic.pushshift.io/rs/submissions/_search/' url = build_elastic_url(search_subreddit_url, kwargs) response = get_request(url) return jsonify(response.json())
def generate_for_instance(self, instance, action): audit_trail = self.model( content_type=ContentType.objects.get_for_model(instance), object_id=instance.id, object_repr=unicode(instance), action=action ) request = get_request(['user', 'META']) if request: if request.user.is_authenticated(): audit_trail.user = request.user audit_trail.user_ip = request.META.get('HTTP_X_FORWARDED_FOR', None) or request.META.get('REMOTE_ADDR') audit_trail.save() return audit_trail
def generate_for_instance(self, instance, action): audit_trail = self.model( content_type=ContentType.objects.get_for_model(instance), object_id=instance.id, object_repr=unicode(instance), action=action) request = get_request(['user', 'META']) if request: if request.user.is_authenticated(): audit_trail.user = request.user audit_trail.user_ip = request.META.get( 'HTTP_X_FORWARDED_FOR', None) or request.META.get('REMOTE_ADDR') audit_trail.save() return audit_trail
def get_codigo_pl_arquivado(self): """ Returns API id that identifies pls that have already been archived Returns --------- str -> API id that identifies pls that have already been archived """ codigos_situacoes = utils.get_request(constants.URL_API_SENADO + "materia/situacoes").json() codigo_situacao = ( codigos_situacoes['ListaSituacoes']['Situacoes']['Situacao']) for item in codigo_situacao: if re.match('arquivada', item['Descricao'].lower()): codigo_arquivado = item['Codigo'] return codigo_arquivado
def get_sonny_moore_rating(year): """ Scrape Sonny Moore's computer power index of a certain year :param year: string or int :return: pandas.DataFrame containing Sonny Moore’s ratings for a given year """ # only the last two digits of the year are used in the url if len(str(year)) > 2: year = str(year)[-2:] url = 'http://sonnymoorepowerratings.com/cb' + str(year) + '.htm' headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A" } r = get_request(url, headers=headers) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') header = soup.find_all('font')[1].text.split('\n')[1] columns = header.split() table = soup.find_all('b')[0] table = str(table.string) table = table.split('\n') data = [] n = len(columns) - 1 for i in table: row = i.split() if row: el = [] name = ' '.join(row[1:-n]) el.append(name) el.extend(row[-n:]) data.append(el) df = pd.DataFrame(data=data, columns=columns) return df
def request_projeto(self, ids_projeto, palavras_chaves, ong_name): """ Make pls requests and save them to database Args ------- ids_projeto: list of integers -> list of integers with all pls ids palavras_chaves: list of strings -> All keywords from all subjects ong_name: str -> Ong name """ req_id = constants.URL_API_CAMARA + "proposicoes/{}" try: for id_projeto in ids_projeto: request_str = req_id.format(id_projeto) req_projeto = utils.get_request(request_str) json_projeto = req_projeto.json() descricao_situacao = (json_projeto["dados"]["statusProposicao"] ["descricaoSituacao"]) ementa = json_projeto["dados"]["ementa"] if descricao_situacao: # if not None if (constants.PL_ARQUIVADO != descricao_situacao and utils.search_keyword(ementa, palavras_chaves)): db_data = self.build_projeto_dict( json_projeto, ong_name) el_data = db_data utils.save_projeto_to_db(db_data) pl_datetime = (datetime.datetime.strptime( el_data['data'], "%d/%m/%Y")) el_data['data'] = datetime.datetime.strftime( pl_datetime, "%Y/%m/%d") el_data['tags_ementa'] = utils.get_tags_from_string( ementa) el_data['tags_tramitacao'] = ( utils.get_tags_from_string(db_data["tramitacao"])) el_data['keywords'] = utils.get_ementa_keyword( palavras_chaves, ementa) del el_data['_id'] constants.es.index(index='projects', doc_type='project', body=el_data) except TypeError: return
def get_users_data(users_url: str) -> list: users = get_request(url=users_url) if users: users_data = list() for user in users: if len(user) > 1: # т.к может прийти невалидный объект users_data.append({ 'id': user.get('id'), 'name': user.get('name'), 'username': user.get('username'), 'email': user.get('email'), 'company_name': user.get('company').get('name'), }) return users_data
def get_assunto(self, keywords): """ Returs API subject codes for keywords received by parameter Args ------- keywords: list of string -> All keywords from all subjects Returns --------- list of string -> API subject codes for keywords received by parameter """ codigos_assuntos = [] assunto_json = utils.get_request(constants.URL_API_SENADO + "materia/assuntos").json() for assunto in assunto_json["ListaAssuntos"]["Assuntos"]["Assunto"]: if utils.search_keyword(assunto["AssuntoEspecifico"], keywords): codigos_assuntos.append(assunto["Codigo"]) return codigos_assuntos
def get_ken_pomeroys_rating(year): """ :param year: string or int :return: pandas.DataFrame containing Ken Pomeroy’s ratings for a given year """ base_url = "https://kenpom.com/index.php?y=" url = base_url + str(year) r = get_request( url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A" }) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') table = soup.find_all('table')[0] data = parse_table(table) columns = get_table_header(table, index=1) data = np.array(data) cleaned_data = [] for i in data: mask = [1, 2, 3, 4, 5, 7, 9, 11, 13, 15, 17, 19] cleaned_data.append(i[mask]) df = pd.DataFrame(cleaned_data, index=np.arange(1, len(data) + 1), columns=columns) df[df.columns[0]] = df[df.columns[0]].str.strip('0123456789 ') return df
def __init__(self, queries, query_type='companies', limit='20', headers={}): """Query companies house""" # remove duplicate names to query queries = [x.lower() for x in queries] queries = list(set(queries)) self.data = [] self.matched_officers = [] self.matched_persons = [] record_count = 0 for query in queries: url = 'https://api.companieshouse.gov.uk/search/%s?q=%s&items_per_page=%s' % ( query_type, query.lower(), limit) self.url = url.replace(' ', '+') request = get_request(url=self.url, user=companies_house_user, headers=headers) data = request.json() if data.has_key('items'): data = data['items'] else: data = [] record_count += len(data) # remove duplicate records found for d in data: self_links = [i['links']['self'] for i in self.data] if d['links']['self'] in self_links: pass else: self.data.append(d)
def mfold_result(task_id, zipname="now.zip", path="./results/mfold/", verbose=True): """Gets mfold result via task_id Args: task_id: Id of task which was given by RESTful API zipname: Name of zip file in which client will save results. After save this file is removed path: Path where results should be stored verbose: Bool which tells if function should print what she actualy does Returns: None """ req = get_request("mfold_result", task_id) with open(zipname, "wb") as f: for chunk in req.iter_content(): f.write(chunk) unzip(zipname, path) os.remove(zipname) if verbose: print("Result in: {}/".format(path + task_id))
def _get_data(self, type_of_data): response = utils.get_request("droplets/{0}/{1}".format(self.id, type_of_data), self.authentication_token) return response.json()
def from_existing(cls, domain_name, record_id, authentication_token): get_domain_record_response = utils.get_request("domains/{0}/records/{1}" .format(domain_name, record_id), authentication_token)