def register_requests(self): while self.results.qsize( ) != self.n_jobs: # TODO: change this condition into an event job = self.jobs.get() worker = self.workers.get() try: log.info('assigning job {} to worker {}'.format( job.id, worker)) with open(job.file_path, 'rb') as data_file: requests_retry_session(RETRY_NUM).post(url=worker, data=data_file, headers={ 'job_id': job.id, 'start_with': job.start_with }) with lock: self.in_flights[job.id] = JobAssignment( job, worker, time.time()) self.worker_statuses[worker] = 0 except Exception as e: log.warning( 'worker {} did not reply to job {}, error msg is {}'. format(worker, job.id, e.message)) self.jobs.put(job)
def heartbeats(self): while self.results.qsize() != self.n_jobs: with lock: for worker, _ in self.worker_statuses.items(): try: requests_retry_session( retries=RETRY_NUM).get(worker + '/heartbeat') self.worker_statuses[worker] = 0 except Exception as e: log.warning( 'worker {} does not respond to heartbeat message, err msg is' .format(worker, e.message)) self.worker_statuses[worker] += 1 time.sleep(3)
def advanced_search(search_str): """ search pubmed for a review or meta-analysis containing search_str in the title or abstract & return matching PMIDs """ base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' r = utils.requests_retry_session().get( base_url, params={ 'db': 'pubmed', 'term': '(systematic review[ti] OR meta analysis[ti]) AND ' + search_str + '[tiab]', 'format': 'json', 'retmax': 300000, 'email': eutils_email, 'tool': eutils_tool, 'api_key': eutils_key }) json = r.json() pmids = json['esearchresult']['idlist'] if pmids: matches = crud.get_reviews_with_ids(pmids) print matches return matches if matches else None return None
def download_file(url, save_dir): req = utils.requests_retry_session().get(url, stream=True) if req.status_code == 200: if 'content-disposition' in req.headers: content_disposition = req.headers['content-disposition'] filename_pattern = 'filename="(.*?)"' filename = re.findall(filename_pattern, content_disposition)[0] filename = filename.replace('/', '_') else: content_type = req.headers['content-type'] filename, ext = content_type.rsplit('/', 1) filename = f'{filename.replace("/", "_")}.{ext}' save_path = Path(save_dir) / filename idx = 1 while save_path.exists(): idx += 1 new_filename = save_path.stem + str(idx) + save_path.suffix save_path = save_path.parent / new_filename print(f'Will save to: {save_path}') with open(save_path, 'wb') as f: for chunk in req.iter_content(1024): f.write(chunk) print(f'Download completed: {save_path}') if filename.endswith('zip'): print(f'Unzipping {save_path}') f = zipfile.ZipFile(save_path) f.extractall(save_dir) print('Completed; deleting the zip file.') save_path.unlink() else: print(f'Got {req.status_code} while downloading file.') return req.status_code
def __init__(self, base_url=None, auth=None, fhir_version=FHIR_VERSION, status_endpoint=None): self.logger = logging.getLogger(type(self).__name__) self.base_url = base_url self.status_endpoint = status_endpoint self.auth = auth self.fhir_version = fhir_version self.session = requests_retry_session()
def __init__(self, client_token): self.client_token = client_token self.session = requests_retry_session() self.headers = None self.accounts = None self.get_access_token() self.get_accounts()
def get_page(user_id, page_token=None): params = {'key': config.API_KEY} if page_token: params['pageToken'] = page_token api_url = config.API_URL.format(user_id=user_id) req = utils.requests_retry_session().get(api_url, params=params) json_data = req.json() next_page_token = json_data.get('nextPageToken', None) return json_data['items'], next_page_token
def send_result_to_master(self): while not to_stop_server.is_set(): result = self.results.get() try: log.info('sending back {}'.format(result.job_id)) log.debug('result content is {}'.format(result.content)) requests_retry_session(RETRY_NUM).post( MASTER_URL, data=json.dumps({ 'worker': 'http://{}:{}'.format(local_ip, PORT_NUM), 'job_id': result.job_id, 'content': result.content, })) except Exception as e: logging.error( 'Cannot send result {} to master {}. Error msg is {}'. format(result, MASTER_URL, e.message))
def send_request(metrics, size): if size == 0: return None query = 'http://localhost:4242/api/query?start={}'.format(START_TIME) counter = 0 for metric in metrics: if counter < size: query += '&m=zimsum:0all-count-nan:{}'.format(metric) counter += 1 else: break return requests_retry_session(retries=5).get(query)
def download_media(album_url, save_dir): print(f'Connecting to {album_url}') if 'youtu' in album_url: print(f'Trying to download from youtube, using you-get') subprocess.run(['you-get', album_url, '-o', str(save_dir)]) return 999 album_req = utils.requests_retry_session().get(album_url) if album_req.status_code == 200: pattern = '(http.*?video-downloads\.googleusercontent\.com.*?)"' download_url = re.findall(pattern, album_req.text)[0] print(f'Extracted download URL: {download_url}') Path(save_dir).mkdir(parents=True, exist_ok=True) status_code = download_file(download_url, save_dir) if status_code != 200: print('Got failure; going into fallback mode (dirty)') try: parsed = lxml.html.document_fromstring(album_req.text) script_tags = parsed.cssselect('script') json_data = None for script_tag in script_tags: if ('AF_initDataCallback' in script_tag.text and "key: 'ds:0'" in script_tag.text): json_data_str = re.findall('return (.*?)}}', script_tag.text, flags=re.S)[0] json_data = json.loads(json_data_str) break if json_data is None: return 404 url_tups = list(json_data[-1][-1][-1][-1].values())[-1][-1] max_res = 0 url = None for url_tup in url_tups: res = url_tup[1] * url_tup[2] if res > max_res: max_res = res url = url_tup[3] print(f'Extracted dirty url {url}') new_status_code = download_file(url, save_dir) if new_status_code == 200: return 888 return new_status_code except AttributeError: # Possibly due to broken video. Just return 777, # and let's download the thumbnail only... return 777 else: print(f'Got {album_req.status_code} while extracting URL.') status_code = album_req.status_code return status_code
def parse(idx, url, retries=None, backoff=None, timeout=None, session=None): try: session = requests_retry_session(retries, backoff, status_forcelist=status_forcelist, session=session) response = session.get(url.format(idx), timeout=timeout, verify=False) jsdata = Box(json.loads(response.text)) if not jsdata.success: raise ParseError('Status success is not true') return jsdata.obj except (ConnectionError, HTTPError): raise NetworkError(f'Failed to process {idx}')
def download(self, image, filename=None): """ :param str image: :param str filename: :return: """ if filename is None: filename = os.path.basename(image) local = self.path + '/' + filename if os.path.exists(local): print('Skipping ' + filename) return if image.startswith('//'): image = "http:" + image resp = utils.requests_retry_session().get(image) with open(local, 'wb') as f: f.write(resp.content)
def pubmed_convert_id(known_id, desired_id): """ convert from PMID to DOI or vice versa @param known_id: ID we have @param desired_id: type of ID we desire (PMID or DOI) @return: desired ID or None """ base_url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/" r = utils.requests_retry_session().get(base_url, params={'ids': known_id, 'format': 'json', 'email': eutils_email, 'tool': eutils_tool, 'api_key': eutils_key}) if r.status_code == 200: r = json.loads(r.text) if desired_id in r['records'][0]: return r['records'][0][desired_id] else: return None else: return None
def __init__(self, srv, show_id): """ :param PageDownloader srv: :param show_id: """ threading.Thread.__init__(self) self.service = srv self.id = show_id def run(self): self.service.download(self.id) if __name__ == '__main__': service = PageDownloader(utils.requests_retry_session(), 'nyato', 'https://www.nyato.com/manzhan/{}/') start = int(input('Start ID:')) end = int(input('End ID:')) if start > end or start <= 0: print('Start - End is invalid') exit(1) chunks = utils.make_chunks(range(start, end), 10) for chunk in chunks: pool = [] for show_id in chunk: thread = CollectThread(service, show_id) thread.start() pool.append(thread) for thread in pool: thread.join()
def __init__(self, srv, show_id): """ :param PageDownloader srv: :param show_id: """ threading.Thread.__init__(self) self.service = srv self.id = show_id def run(self): self.service.download(self.id) if __name__ == '__main__': service = PageDownloader(utils.requests_retry_session(), int(input('Event ID:'))) start = int(input('Start ID:')) end = int(input('End ID:')) if start > end or start <= 0: print('Start - End is invalid') exit(1) chunks = utils.make_chunks(range(start, end + 1), 10) for chunk in chunks: pool = [] for show_id in chunk: thread = CollectThread(service, show_id) thread.start() pool.append(thread) for thread in pool: thread.join()
print('{}: {}'.format(show_id, response.content)) class CollectThread(threading.Thread): def __init__(self, srv, show_id): """ :param ShowInfoDownloader srv: :param show_id: """ threading.Thread.__init__(self) self.service = srv self.id = show_id def run(self): self.service.download(self.id) if __name__ == '__main__': service = ShowInfoDownloader( utils.requests_retry_session(), 'json', 'https://show.bilibili.com/api/ticket/project/get?version=133&id={}') start = int(input('Start ID:')) end = int(input('End ID:')) if start > end or start <= 0: print('Start - End is invalid') exit(1) while start <= end: service.download(start) start += 1
def main(): start_with = sys.argv[1] if len(sys.argv) > 1 else "" workers = config.workers_url() start_time = time.time() log.info('master starts, start_with is "{}", workers are {}'.format( start_with, workers)) with tempdir() as tmp_dir: all_metrics_file_name = 'all_metric_names.tmp' with open('{}/{}'.format(tmp_dir, all_metrics_file_name), 'w') as all_metrics_file: get_all_metrics_into_file(start_with, all_metrics_file) parts = split_file_into_parts(file_path=all_metrics_file.name, num_parts=len(workers) * WORKER_JOB_NUM, output_dir=tmp_dir) log.info('partition finishes, all jobs are: {}'.format(parts)) jobs = [Job(start_with, part) for part in parts] # master preparation master = MasterServer(workers, jobs) # setup threads listening = Thread(target=master.server.serve_forever) listening.daemon = True sending = Thread(target=master.register_requests) sending.daemon = True checking = Thread(target=master.scan_requests) checking.daemon = True heartbeat = Thread(target=master.heartbeats) heartbeat.daemon = True listening.start() log.info( 'master server starts up, listening on port {}'.format(PORT_NO)) sending.start() checking.start() heartbeat.start() # waiting for results from workers results = [] while len(results) < len(jobs): try: result = master.results.get(timeout=MAX_JOB_INTERVAL) except Empty: log.error('master waited too long for result, shutting down') exit(1) results.append(result) # all work done, shutdown servers for worker in workers: try: log.info('sending shutdown to worker {}'.format(worker)) requests_retry_session(RETRY_NUM).get(worker + '/shutdown') except Exception as e: log.error( 'unable to stop worker {}, error message is {}'.format( worker, e.message)) master.server.shutdown() master.server.socket.close() log.info('master server shutdown, beginning aggregation') # start reducing phase merged, to_expands, total_number = reducer.start(results, start_with) put_to_tsdb(start_with, merged, to_expands) log.info( 'one round master aggregation finished, to_expands are {}'.format( to_expands)) expand(to_expands, tmp_dir, '{}/{}'.format(tmp_dir, all_metrics_file_name), total_number) log.info('finished! total running time is {}'.format(time.time() - start_time))
def login(self, user, passwd, retries=15): """ Tries to login to stackoverflow and returns the requests session ++ This function logs with logging.getLogger(__name__) ++ user: email or user passwd: password retries: max number of tries to login """ logger = logging.getLogger(__name__) logger.info(f'Trying to login with max of {retries} retries') url='https://stackoverflow.com/users/login?ssrc=head&returnurl=https://stackoverflow.com/' #host='stackoverflow.com' #referer='https://stackoverflow.com/' backoff_factor=3 self.s=requests_retry_session(retries=retries, backoff_factor=backoff_factor, session=self.s) r=self.s.get(url) # logger.debug(f'GetRequest({url}) -> Response==[{r.text}]==') soup=BeautifulSoup(r.text, 'html.parser') form=soup.select('form[id="login-form"]') form=form[0] if len(form)>0 else None if form: actionurl=urljoin('https://stackoverflow.com', form.attrs['action']) else: actionurl=url hinputs=form.select('input[type="hidden"]') if form else [] post_data={i.attrs['name']:i.attrs['value'] for i in hinputs if all( (i.attrs.get('name',False), i.attrs.get('value',False)) )} logger.debug(f'form[{form.attrs if form is not None else form}]; actionurl[{actionurl}]; post_data[{post_data}]') post_data.update({'email':user, 'password':passwd}) self.s=requests_retry_session(retries=retries, backoff_factor=backoff_factor, session=self.s) while True: try: r=self.s.post(actionurl, data=post_data, allow_redirects=False) if r.is_redirect: logger.debug(f'Got redirected to {r.headers["Location"]}') else: logger.debug(f'No redirect. Headers[{r.headers["Location"]}]') if r.cookies.get('uauth', False): #log ok logger.info('Logged in successful. Notifying...') logger.debug(f'Cookies[{r.cookies}]') IFTTnotify(self.event_name, self.key, ('[StackOBot]', 'Login ok')) return True elif r.url.startswith('https://stackoverflow.com/nocaptcha'): logger.warning('Got a CAPTCHA. Trying in a minute. Notifying...') IFTTnotify(self.event_name, self.key, ('[StackOBot]', 'Got captcha')) sleep(60) continue else: logger.error(f'Get didnt get uauth cookie, instead [{r.cookies}]') logger.debug(f'GetRequest({actionurl}) -> Response==[{b64encode(r.content)}]==') raise RuntimeError except Exception as e: #log CRITICAL logger.critical('Couldnt login. Notifying...') IFTTnotify(self.event_name, self.key, ('[StackOBot]', 'Login FAILED')) logger.exception(e) return False
from utils import write_to_db from utils import write_to_db_update from utils import delete_rows import pandas as pd import janitor if __name__ == '__main__': # get trade data for today today = pd.Timestamp.today().strftime('%Y-%m-%d') formData = { "selDatum": today, "btnSave": "Show", "IsItPregledPovijest": "yes", "rbScope": "svi" } response = requests_retry_session().post( 'https://zse.hr/default.aspx?id=26523', data=formData) tblByDate = pd.read_html(response.text, attrs={'id': 'dnevna_trgovanja'}, thousands=".", decimal=',')[0] if len(tblByDate.index) == 0: print("There are no trade data for ZSE.") else: # clean table tblByDate = janitor.clean_names(tblByDate) tblByDate = tblByDate.rename(columns={ 'ticker': 'symbol', 'change_%': 'change' }) tblByDate['change'] = tblByDate['change'].str.extract('(.\d+,\d+)') tblByDate['change'] = tblByDate['change'].str.replace(',', '.')
import requests import os import json import time from utils import get_valid_filename, requests_retry_session api_base = 'https://mobileapi.jumbo.com/v5' jumbo_products_path = 'products/jumbo' session = requests_retry_session() def fetch_product(product_id): url = '{}/products/{}'.format(api_base, product_id) r = session.get(url) r.raise_for_status() return r.json()['product']['data'] def fetch_products(size=100): offset = 0 products = [] while True: url = '{}/products?count={}&offset={}'.format(api_base, size, offset) try: r = session.get(url) r.raise_for_status()
def update_anime(type, metadata, media, force): result = JSON.ObjectFromString(get_anime(metadata.id)) anime = result['data']['Page']['media'][0] has_mal_page = True #Get episode data if Prefs['episode_support']: try: mal_episodes = get_episodes(str(anime['idMal'])) except: has_mal_page = False Log.Error('Error: Show has no episode data: ' + metadata.id) else: has_mal_page = False # Genres if metadata.genres is None or force: metadata.genres.clear() try: metadata.genres = anime['genres'] except: Log.Error('Error: Show has no genres: ' + metadata.id) # Rating if metadata.rating is None or force: try: metadata.rating = float(anime['averageScore']) / 10 except: Log.Error('Error: Show has no rating: ' + metadata.id) # Title if metadata.title is None or force: title_language = Prefs['title_language'] for language in anime['title']: if language == title_language: metadata.title = anime['title'][language] break metadata.title = anime['title'][language] # Posters if metadata.posters is None or force: try: poster = Proxy.Media( requests_retry_session().get(anime['coverImage']['extraLarge'], verify=certifi.where()).content) metadata.posters[anime['coverImage']['extraLarge']] = poster except: Log.Error('Error: Show has no posters: ' + metadata.id) # Summary if metadata.summary is None or force: try: h = HTMLParser() cleanr = re.compile('<.*?>') metadata.summary = re.sub(cleanr, '', h.unescape(anime['description'])) except: Log.Error('Error: Show has no summary: ' + metadata.id) # Country if metadata.countries is None or force: try: if anime['countryOfOrigin'] == 'JP': metadata.countries = ['Japan'] elif anime['countryOfOrigin'] == 'CN': metadata.countries = ['China'] elif anime['countryOfOrigin'] == 'KR': metadata.countries = ['Korea'] else: metadata.countries = ['Unknown, please report'] except: Log.Error('Error: Show has no country of origin: ' + metadata.id) # Start Date if metadata.originally_available_at is None or force: try: metadata.originally_available_at = datetime( anime['startDate']['year'], anime['startDate']['month'], anime['startDate']['day']) except: Log.Error('Error: Show has no start date: ' + metadata.id) # Studio if metadata.studio is None or force: try: metadata.studio = anime['studios']['edges'][0]['node']['name'] except: Log.Error('Error: Show has no studio: ' + metadata.id) if metadata.roles is None or force: metadata.roles.clear() # Characters if metadata.roles is None or force: try: for character in anime['characters']['edges']: # Create new role role = metadata.roles.new() # Get correct VA for VA in character['voiceActors']: # Set VA Name try: role.name = VA['name']['full'] except: pass # Set VA Photo try: role.photo = VA['image']['large'] except: pass # Set Character Name try: role.role = character['node']['name']['full'] except: pass except: Log.Error('Error: Show has no Characters: ' + metadata.id) # Staff if metadata.roles is None or force: try: for staff in anime['staff']['edges']: # Create new role role = metadata.roles.new() # Set Staff Name try: role.name = staff['node']['name']['full'] except: pass # Set Staff Photo try: role.photo = staff['node']['image']['large'] except: pass # Set Staff Role try: role.role = staff['role'] except: pass except: Log.Error('Error: Show has no staff: ' + metadata.id) #Reviews if metadata.reviews is None or force: metadata.reviews.clear() try: for review in anime['reviews']['edges']: # Create new review r = metadata.reviews.new() # Set Review Author try: r.author = review['node']['user']['name'] except: pass # Set Review Source r.source = 'AniList' # Set Review Image try: if int(review['node']['score']) >= 50: r.image = 'rottentomatoes://image.review.fresh' else: r.image = 'rottentomatoes://image.review.rotten' except: pass # Set Review Link try: r.link = review['node']['siteUrl'] except: pass # Set Review Text try: r.text = review['node']['summary'] except: pass except: Log.Error('Error: Show has no reviews: ' + metadata.id) # TV Specific if type == 'tv': # Banners if metadata.banners is None or metadata.art is None or force: try: banner_hash = base64.b64encode(str(anime['bannerImage'])) banner = Proxy.Media(requests_retry_session().get( anime['bannerImage'], verify=certifi.where()).content) except: Log.Error('Error: Show has no banners: ' + metadata.id) if metadata.banners is None or force: metadata.banners[banner_hash] = banner if metadata.art is None or force: metadata.art[banner_hash] = banner # Episodes if Prefs['episode_support'] and has_mal_page and 1 in media.seasons: update_episodes(media, metadata, force, mal_episodes) # Movie Specific if type == 'movie': if metadata.art is None or force: try: banner_hash = base64.b64encode(str(anime['bannerImage'])) banner = Proxy.Media(requests_retry_session().get( anime['bannerImage'], verify=certifi.where()).content) metadata.art[banner_hash] = banner except: Log.Error('Error: Show has no banners: ' + metadata.id) # Year if metadata.year is None or force: try: metadata.year = anime['startDate']['year'] except: Log.Error('Error: Show has no start date: ' + metadata.id) # Roles if metadata.roles is None or force: # Staff try: for staff in anime['staff']['edges']: # Director try: if staff['role'] == 'Director': director = metadata.directors.new() director.name = staff['node']['name']['full'] except: pass except: Log.Error('Error: Show has no staff: ' + metadata.id)
def search_anime(type, results, media, lang): search_term = media.show if type == 'tv' else media.name search_year = media.year if media.year is None: search_year = 'null' query = ''' query ($id: Int, $perPage: Int, $search: String, $seasonYear: Int) { anime: Page(perPage: $perPage) { pageInfo { total } results: media(type: ANIME, id: $id, search: $search seasonYear: $seasonYear) { id title { romaji } startDate { year } } } } ''' if Prefs['search_type'] == 'id-name': variables = '''{ "id": "''' + re.match('^([0-9]*?) ', search_term).group(1) + '''" }''' if Prefs['search_type'] == 'name-id': variables = '''{ "id": "''' + re.search(' ([0-9]*?)$', search_term).group(1) + '''" }''' if Prefs['search_type'] == 'id': variables = '''{ "id": "''' + re.match('^([0-9]*?)$', search_term).group(1) + '''" }''' if Prefs['search_type'] == 'name': variables = '''{ "search": "''' + search_term + '''", "perPage": 6 }''' if Prefs['search_type'] == 'any': if search_term.isdigit(): variables = '''{ "id": "''' + search_term + '''" }''' else: variables = '''{ "search": "''' + search_term + '''", "perPage": 6, "seasonYear": ''' + str(search_year) + ''' }''' try: request = requests_retry_session().post('https://graphql.anilist.co', data={ 'query': query, 'variables': variables }, verify=certifi.where()) except: Log.Error('Error searching AniList - Anime: ' + query) return s = 100 for result in JSON.ObjectFromString( request.content)['data']['anime']['results']: results.Append( MetadataSearchResult(id=str(result['id']), name=result['title']['romaji'], year=result['startDate']['year'], score=s, lang=lang)) s = s - 1 return
def populate_reviews(period): """ download all new reviews made available on pubmed in the last <period> # days & save to db if they have trials in CrossRef or Cochrane """ base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' r = utils.requests_retry_session().get( base_url, params={ 'db': 'pubmed', 'term': 'systematic review[ti] OR meta analysis[ti] OR cochrane database of systematic reviews[ta]', 'format': 'json', 'retmax': 300000, 'email': crud.eutils_email, 'tool': crud.eutils_tool, 'api_key': eutils_key, 'date_type': 'edat', 'mindate': (datetime.now().date() - timedelta(days=period)).strftime('%Y/%m/%d'), 'maxdate': '3000' }) json = r.json() pmids = json['esearchresult']['idlist'] print len(pmids) segments = utils.chunks(pmids, 100) ec = Client(api_key=eutils_key) for s in segments: while True: try: articles = ec.efetch(db='pubmed', id=s) break except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print e time.sleep(5) a_iter = iter(articles) while True: try: article = a_iter.next() except StopIteration: break print '-----------------' + article.pmid + '-------------------------' if article.doi is not None: ids = bot.check_trialpubs_nctids(article.pmid, article.doi) else: ids = bot.check_trialpubs_nctids(article.pmid) if ids: if ids.pmids: print ids.pmids count = crud.articles_with_nctids( tuple(x for x in ids.pmids)) print count if count and len(count) > 0: print 'articles with links = ' + str(len(count)) print 'inserting ' + str(article.pmid) crud.pubmedarticle_to_db(article, 'systematic_reviews') for trialpub in count: crud.review_publication(article.pmid, trialpub, 9) linked_ncts = crud.linked_nctids(trialpub) for nct in linked_ncts: crud.review_trial(article.pmid, nct, False, 'included', user_id=9, nickname='crossrefbot') if ids.nctids: crud.pubmedarticle_to_db(article, 'systematic_reviews') print 'nct ids in crossref = ' + str(len(ids.nctids)) for nct_id in ids.nctids: crud.review_trial(article.pmid, nct_id, False, 'included', 'crossrefbot', 9) if not ids.nctids and not ids.pmids: print 'found nothing' else: print 'nothing' if 'Cochrane' in article.jrnl: print 'Cochrane' crud.pubmedarticle_to_db(article, 'systematic_reviews') bot.cochranebot(article.doi, article.pmid) bot.cochrane_ongoing_excluded(article.doi, article.pmid) conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "select rt.review_id, json_agg(distinct v.user_id) as users from review_rtrial rt" " inner join votes v on rt.id = v.link_id where rt.review_id = %s group by" " rt.review_id;", (article.pmid, )) new_users = cur.fetchone() if not new_users: new_users = {'users': []} if not {17, 9} & set(new_users['users']): print 'deleting ' + str(new_users['users']), article.pmid cur.execute( "delete from votes where link_id in (select id from review_rtrial where review_id = %s);", (article.pmid, )) conn.commit() cur.execute( "delete from review_trialpubs where review_id = %s;", (article.pmid, )) conn.commit() cur.execute( "delete from review_rtrial where review_id = %s;", (article.pmid, )) conn.commit() cur.execute( "delete from systematic_reviews where review_id = %s;", (article.pmid, )) conn.commit() conn.close() else: print 'not cochrane'
def update_anime(type, metadata, media, force): resp = get_anime(metadata.id) Log.Debug("Anilist Response: {}".format(resp)) result = JSON.ObjectFromString(resp) anime = result['data']['Page']['media'][0] has_mal_page = True #Get episode data if Prefs['episode_support']: try: mal_episodes = get_episodes(str(anime['idMal'])) except: has_mal_page = False Log.Error('Error: Show has no episode data: ' + metadata.id) else: has_mal_page = False # Genres metadata.genres.clear() try: metadata.genres = anime['genres'] + anime['tags'] + [anime['format']] + [anime['status']] + [anime['season'] + ' ' + anime['seasonYear']] except: Log.Error('Error: Show has no genres: ' + metadata.id) # Rating try: metadata.rating = float(anime['averageScore']) / 10 except: Log.Error('Error: Show has no rating: ' + metadata.id) # Title title_language = Prefs['title_language'] for language in anime['title']: if language == title_language: metadata.title = anime['title'][language] break metadata.title = anime['title'][language] # Posters try: poster = Proxy.Media( requests_retry_session().get( anime['coverImage']['extraLarge'], verify=certifi.where() ).content ) # //save file metadata.posters.validate_keys([]) metadata.posters[anime['coverImage']['extraLarge']] = poster except: Log.Error('Error: Show has no posters: ' + metadata.id) # Summary try: cleanr = re.compile('<.*?>') metadata.summary = re.sub(cleanr, '', anime['description']) except: Log.Error('Error: Show has no summary: ' + metadata.id) # Country try: if anime['countryOfOrigin'] == 'JP': metadata.countries = ['Japan'] elif anime['countryOfOrigin'] == 'CN': metadata.countries = ['China'] elif anime['countryOfOrigin'] == 'KR': metadata.countries = ['Korea'] else: metadata.countries = ['Unknown, please report'] except: Log.Error('Error: Show has no country of origin: ' + metadata.id) # Start Date try: metadata.originally_available_at = datetime(anime['startDate']['year'], anime['startDate']['month'], anime['startDate']['day']) except: Log.Error('Error: Show has no start date: ' + metadata.id) # Studio try: metadata.studio = anime['studios']['edges'][0]['node']['name'] except: Log.Error('Error: Show has no studio: ' + metadata.id) metadata.roles.clear() # Characters # Log.Error(anime['characters']['edges']) try: for character in anime['characters']['edges']: # Create new role role = metadata.roles.new() # Get correct VA for VA in character['voiceActors']: # Set VA Name try: role.name = VA['name']['full'] except: pass # Set VA Photo try: role.photo = VA['image']['large'] except: pass # Set Character Name try: role.role = character['node']['name']['full'] except: pass except: Log.Error('Error: Show has no Characters: ' + metadata.id) # Staff try: for staff in anime['staff']['edges']: # Create new role role = metadata.roles.new() # Set Staff Name try: role.name = staff['node']['name']['full'] except: pass # Set Staff Photo try: role.photo = staff['node']['image']['large'] except: pass # Set Staff Role try: role.role = staff['role'] except: pass except: Log.Error('Error: Show has no staff: ' + metadata.id) # TV Specific if type == 'tv': # Banners try: banner_hash = base64.b64encode(str(anime['bannerImage'])) banner = Proxy.Media( requests_retry_session().get( anime['bannerImage'], verify=certifi.where() ).content ) except: Log.Error('Error: Show has no banners: ' + metadata.id) metadata.banners[banner_hash] = banner metadata.art[banner_hash] = banner # Movie Specific if type == 'movie': try: banner_hash = base64.b64encode(str(anime['bannerImage'])) banner = Proxy.Media( requests_retry_session().get( anime['bannerImage'], verify=certifi.where() ).content ) metadata.art[banner_hash] = banner except: Log.Error('Error: Show has no banners: ' + metadata.id) # Year try: metadata.year = anime['startDate']['year'] except: Log.Error('Error: Show has no start date: ' + metadata.id) # Roles # Staff try: for staff in anime['staff']['edges']: # Director try: if staff['role'] == 'Director': director = metadata.directors.new() director.name = staff['node']['name']['full'] except: pass except: Log.Error('Error: Show has no staff: ' + metadata.id) # Episodes if Prefs['episode_support'] and has_mal_page and 1 in media.seasons: update_episodes(media, metadata, force, mal_episodes)