Ejemplo n.º 1
0
 def register_requests(self):
     while self.results.qsize(
     ) != self.n_jobs:  # TODO: change this condition into an event
         job = self.jobs.get()
         worker = self.workers.get()
         try:
             log.info('assigning job {} to worker {}'.format(
                 job.id, worker))
             with open(job.file_path, 'rb') as data_file:
                 requests_retry_session(RETRY_NUM).post(url=worker,
                                                        data=data_file,
                                                        headers={
                                                            'job_id':
                                                            job.id,
                                                            'start_with':
                                                            job.start_with
                                                        })
             with lock:
                 self.in_flights[job.id] = JobAssignment(
                     job, worker, time.time())
                 self.worker_statuses[worker] = 0
         except Exception as e:
             log.warning(
                 'worker {} did not reply to job {}, error msg is {}'.
                 format(worker, job.id, e.message))
             self.jobs.put(job)
Ejemplo n.º 2
0
 def heartbeats(self):
     while self.results.qsize() != self.n_jobs:
         with lock:
             for worker, _ in self.worker_statuses.items():
                 try:
                     requests_retry_session(
                         retries=RETRY_NUM).get(worker + '/heartbeat')
                     self.worker_statuses[worker] = 0
                 except Exception as e:
                     log.warning(
                         'worker {} does not respond to heartbeat message, err msg is'
                         .format(worker, e.message))
                     self.worker_statuses[worker] += 1
         time.sleep(3)
Ejemplo n.º 3
0
def advanced_search(search_str):
    """ search pubmed for a review or meta-analysis containing search_str in the title or abstract & return matching
    PMIDs """
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    r = utils.requests_retry_session().get(
        base_url,
        params={
            'db':
            'pubmed',
            'term':
            '(systematic review[ti] OR meta analysis[ti]) AND ' + search_str +
            '[tiab]',
            'format':
            'json',
            'retmax':
            300000,
            'email':
            eutils_email,
            'tool':
            eutils_tool,
            'api_key':
            eutils_key
        })
    json = r.json()
    pmids = json['esearchresult']['idlist']
    if pmids:
        matches = crud.get_reviews_with_ids(pmids)
        print matches
        return matches if matches else None
    return None
Ejemplo n.º 4
0
def download_file(url, save_dir):
    req = utils.requests_retry_session().get(url, stream=True)
    if req.status_code == 200:
        if 'content-disposition' in req.headers:
            content_disposition = req.headers['content-disposition']
            filename_pattern = 'filename="(.*?)"'
            filename = re.findall(filename_pattern, content_disposition)[0]
            filename = filename.replace('/', '_')
        else:
            content_type = req.headers['content-type']
            filename, ext = content_type.rsplit('/', 1)
            filename = f'{filename.replace("/", "_")}.{ext}'
        save_path = Path(save_dir) / filename
        idx = 1
        while save_path.exists():
            idx += 1
            new_filename = save_path.stem + str(idx) + save_path.suffix
            save_path = save_path.parent / new_filename
        print(f'Will save to: {save_path}')

        with open(save_path, 'wb') as f:
            for chunk in req.iter_content(1024):
                f.write(chunk)
        print(f'Download completed: {save_path}')
        if filename.endswith('zip'):
            print(f'Unzipping {save_path}')
            f = zipfile.ZipFile(save_path)
            f.extractall(save_dir)
            print('Completed; deleting the zip file.')
            save_path.unlink()
    else:
        print(f'Got {req.status_code} while downloading file.')
    return req.status_code
Ejemplo n.º 5
0
 def __init__(self, base_url=None, auth=None, fhir_version=FHIR_VERSION,
              status_endpoint=None):
     self.logger = logging.getLogger(type(self).__name__)
     self.base_url = base_url
     self.status_endpoint = status_endpoint
     self.auth = auth
     self.fhir_version = fhir_version
     self.session = requests_retry_session()
Ejemplo n.º 6
0
    def __init__(self, client_token):

        self.client_token = client_token
        self.session = requests_retry_session()
        self.headers = None
        self.accounts = None

        self.get_access_token()
        self.get_accounts()
Ejemplo n.º 7
0
def get_page(user_id, page_token=None):
    params = {'key': config.API_KEY}
    if page_token:
        params['pageToken'] = page_token
    api_url = config.API_URL.format(user_id=user_id)
    req = utils.requests_retry_session().get(api_url, params=params)
    json_data = req.json()
    next_page_token = json_data.get('nextPageToken', None)
    return json_data['items'], next_page_token
Ejemplo n.º 8
0
 def send_result_to_master(self):
     while not to_stop_server.is_set():
         result = self.results.get()
         try:
             log.info('sending back {}'.format(result.job_id))
             log.debug('result content is {}'.format(result.content))
             requests_retry_session(RETRY_NUM).post(
                 MASTER_URL,
                 data=json.dumps({
                     'worker':
                     'http://{}:{}'.format(local_ip, PORT_NUM),
                     'job_id':
                     result.job_id,
                     'content':
                     result.content,
                 }))
         except Exception as e:
             logging.error(
                 'Cannot send result {} to master {}. Error msg is {}'.
                 format(result, MASTER_URL, e.message))
Ejemplo n.º 9
0
def send_request(metrics, size):
    if size == 0:
        return None
    query = 'http://localhost:4242/api/query?start={}'.format(START_TIME)
    counter = 0
    for metric in metrics:
        if counter < size:
            query += '&m=zimsum:0all-count-nan:{}'.format(metric)
            counter += 1
        else:
            break
    return requests_retry_session(retries=5).get(query)
Ejemplo n.º 10
0
def download_media(album_url, save_dir):
    print(f'Connecting to {album_url}')
    if 'youtu' in album_url:
        print(f'Trying to download from youtube, using you-get')
        subprocess.run(['you-get', album_url, '-o', str(save_dir)])
        return 999
    album_req = utils.requests_retry_session().get(album_url)
    if album_req.status_code == 200:
        pattern = '(http.*?video-downloads\.googleusercontent\.com.*?)"'
        download_url = re.findall(pattern, album_req.text)[0]
        print(f'Extracted download URL: {download_url}')
        Path(save_dir).mkdir(parents=True, exist_ok=True)
        status_code = download_file(download_url, save_dir)

        if status_code != 200:
            print('Got failure; going into fallback mode (dirty)')
            try:
                parsed = lxml.html.document_fromstring(album_req.text)
                script_tags = parsed.cssselect('script')
                json_data = None
                for script_tag in script_tags:
                    if ('AF_initDataCallback' in script_tag.text
                            and "key: 'ds:0'" in script_tag.text):
                        json_data_str = re.findall('return (.*?)}}',
                                                   script_tag.text,
                                                   flags=re.S)[0]
                        json_data = json.loads(json_data_str)
                        break
                if json_data is None:
                    return 404
                url_tups = list(json_data[-1][-1][-1][-1].values())[-1][-1]
                max_res = 0
                url = None
                for url_tup in url_tups:
                    res = url_tup[1] * url_tup[2]
                    if res > max_res:
                        max_res = res
                        url = url_tup[3]
                print(f'Extracted dirty url {url}')
                new_status_code = download_file(url, save_dir)
                if new_status_code == 200:
                    return 888
                return new_status_code
            except AttributeError:
                # Possibly due to broken video. Just return 777,
                # and let's download the thumbnail only...
                return 777
    else:
        print(f'Got {album_req.status_code} while extracting URL.')
        status_code = album_req.status_code
    return status_code
Ejemplo n.º 11
0
def parse(idx, url, retries=None, backoff=None, timeout=None, session=None):

    try:
        session = requests_retry_session(retries,
                                         backoff,
                                         status_forcelist=status_forcelist,
                                         session=session)
        response = session.get(url.format(idx), timeout=timeout, verify=False)
        jsdata = Box(json.loads(response.text))
        if not jsdata.success:
            raise ParseError('Status success is not true')

        return jsdata.obj

    except (ConnectionError, HTTPError):
        raise NetworkError(f'Failed to process {idx}')
Ejemplo n.º 12
0
    def download(self, image, filename=None):
        """

        :param str image:
        :param str filename:
        :return:
        """
        if filename is None:
            filename = os.path.basename(image)
        local = self.path + '/' + filename
        if os.path.exists(local):
            print('Skipping ' + filename)
            return
        if image.startswith('//'):
            image = "http:" + image
        resp = utils.requests_retry_session().get(image)
        with open(local, 'wb') as f:
            f.write(resp.content)
Ejemplo n.º 13
0
def pubmed_convert_id(known_id, desired_id):
    """
    convert from PMID to DOI or vice versa
    @param known_id: ID we have
    @param desired_id: type of ID we desire (PMID or DOI)
    @return: desired ID or None
    """
    base_url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
    r = utils.requests_retry_session().get(base_url,
                                           params={'ids': known_id, 'format': 'json', 'email': eutils_email,
                                                   'tool': eutils_tool, 'api_key': eutils_key})
    if r.status_code == 200:
        r = json.loads(r.text)
        if desired_id in r['records'][0]:
            return r['records'][0][desired_id]
        else:
            return None
    else:
        return None
Ejemplo n.º 14
0
    def __init__(self, srv, show_id):
        """

        :param PageDownloader srv:
        :param show_id:
        """
        threading.Thread.__init__(self)
        self.service = srv
        self.id = show_id

    def run(self):
        self.service.download(self.id)


if __name__ == '__main__':
    service = PageDownloader(utils.requests_retry_session(), 'nyato', 'https://www.nyato.com/manzhan/{}/')
    start = int(input('Start ID:'))
    end = int(input('End ID:'))
    if start > end or start <= 0:
        print('Start - End is invalid')
        exit(1)
    chunks = utils.make_chunks(range(start, end), 10)
    for chunk in chunks:
        pool = []
        for show_id in chunk:
            thread = CollectThread(service, show_id)
            thread.start()
            pool.append(thread)
        for thread in pool:
            thread.join()
Ejemplo n.º 15
0
    def __init__(self, srv, show_id):
        """

        :param PageDownloader srv:
        :param show_id:
        """
        threading.Thread.__init__(self)
        self.service = srv
        self.id = show_id

    def run(self):
        self.service.download(self.id)


if __name__ == '__main__':
    service = PageDownloader(utils.requests_retry_session(),
                             int(input('Event ID:')))
    start = int(input('Start ID:'))
    end = int(input('End ID:'))
    if start > end or start <= 0:
        print('Start - End is invalid')
        exit(1)
    chunks = utils.make_chunks(range(start, end + 1), 10)
    for chunk in chunks:
        pool = []
        for show_id in chunk:
            thread = CollectThread(service, show_id)
            thread.start()
            pool.append(thread)
        for thread in pool:
            thread.join()
Ejemplo n.º 16
0
            print('{}: {}'.format(show_id, response.content))


class CollectThread(threading.Thread):
    def __init__(self, srv, show_id):
        """

        :param ShowInfoDownloader srv:
        :param show_id:
        """
        threading.Thread.__init__(self)
        self.service = srv
        self.id = show_id

    def run(self):
        self.service.download(self.id)


if __name__ == '__main__':
    service = ShowInfoDownloader(
        utils.requests_retry_session(), 'json',
        'https://show.bilibili.com/api/ticket/project/get?version=133&id={}')
    start = int(input('Start ID:'))
    end = int(input('End ID:'))
    if start > end or start <= 0:
        print('Start - End is invalid')
        exit(1)
    while start <= end:
        service.download(start)
        start += 1
Ejemplo n.º 17
0
def main():
    start_with = sys.argv[1] if len(sys.argv) > 1 else ""
    workers = config.workers_url()
    start_time = time.time()
    log.info('master starts, start_with is "{}", workers are {}'.format(
        start_with, workers))
    with tempdir() as tmp_dir:
        all_metrics_file_name = 'all_metric_names.tmp'
        with open('{}/{}'.format(tmp_dir, all_metrics_file_name),
                  'w') as all_metrics_file:
            get_all_metrics_into_file(start_with, all_metrics_file)
        parts = split_file_into_parts(file_path=all_metrics_file.name,
                                      num_parts=len(workers) * WORKER_JOB_NUM,
                                      output_dir=tmp_dir)
        log.info('partition finishes, all jobs are: {}'.format(parts))
        jobs = [Job(start_with, part) for part in parts]

        # master preparation
        master = MasterServer(workers, jobs)

        # setup threads
        listening = Thread(target=master.server.serve_forever)
        listening.daemon = True
        sending = Thread(target=master.register_requests)
        sending.daemon = True
        checking = Thread(target=master.scan_requests)
        checking.daemon = True
        heartbeat = Thread(target=master.heartbeats)
        heartbeat.daemon = True
        listening.start()
        log.info(
            'master server starts up, listening on port {}'.format(PORT_NO))
        sending.start()
        checking.start()
        heartbeat.start()

        # waiting for results from workers
        results = []
        while len(results) < len(jobs):
            try:
                result = master.results.get(timeout=MAX_JOB_INTERVAL)
            except Empty:
                log.error('master waited too long for result, shutting down')
                exit(1)
            results.append(result)

        # all work done, shutdown servers
        for worker in workers:
            try:
                log.info('sending shutdown to worker {}'.format(worker))
                requests_retry_session(RETRY_NUM).get(worker + '/shutdown')
            except Exception as e:
                log.error(
                    'unable to stop worker {}, error message is {}'.format(
                        worker, e.message))
        master.server.shutdown()
        master.server.socket.close()
        log.info('master server shutdown, beginning aggregation')

        # start reducing phase
        merged, to_expands, total_number = reducer.start(results, start_with)
        put_to_tsdb(start_with, merged, to_expands)
        log.info(
            'one round master aggregation finished, to_expands are {}'.format(
                to_expands))
        expand(to_expands, tmp_dir,
               '{}/{}'.format(tmp_dir, all_metrics_file_name), total_number)
        log.info('finished! total running time is {}'.format(time.time() -
                                                             start_time))
Ejemplo n.º 18
0
    def login(self, user, passwd, retries=15):
        """
            Tries to login to stackoverflow and returns the requests session
            ++ This function logs with logging.getLogger(__name__) ++

            user: email or user
            passwd: password
            retries: max number of tries to login
        """
        logger = logging.getLogger(__name__)
        logger.info(f'Trying to login with max of {retries} retries')
        
        url='https://stackoverflow.com/users/login?ssrc=head&returnurl=https://stackoverflow.com/'
        #host='stackoverflow.com'
        #referer='https://stackoverflow.com/'
        backoff_factor=3

        self.s=requests_retry_session(retries=retries, backoff_factor=backoff_factor, session=self.s)
    
        r=self.s.get(url)
        # logger.debug(f'GetRequest({url}) -> Response==[{r.text}]==')

        soup=BeautifulSoup(r.text, 'html.parser')
        form=soup.select('form[id="login-form"]')
        form=form[0] if len(form)>0 else None

        if form:
            actionurl=urljoin('https://stackoverflow.com', form.attrs['action'])
        else:
            actionurl=url
        
        hinputs=form.select('input[type="hidden"]') if form else []
        post_data={i.attrs['name']:i.attrs['value'] for i in hinputs 
                        if all( (i.attrs.get('name',False), i.attrs.get('value',False)) )}
        logger.debug(f'form[{form.attrs if form is not None else form}]; actionurl[{actionurl}]; post_data[{post_data}]')
        post_data.update({'email':user, 'password':passwd})

        self.s=requests_retry_session(retries=retries, backoff_factor=backoff_factor, session=self.s)
        while True:
            try:
                r=self.s.post(actionurl, data=post_data, allow_redirects=False)
                
                if r.is_redirect:
                    logger.debug(f'Got redirected to {r.headers["Location"]}')
                else:
                    logger.debug(f'No redirect. Headers[{r.headers["Location"]}]')

                if r.cookies.get('uauth', False):
                    #log ok
                    logger.info('Logged in successful. Notifying...')
                    logger.debug(f'Cookies[{r.cookies}]')
                    IFTTnotify(self.event_name, self.key, ('[StackOBot]', 'Login ok'))
                    return True
                elif r.url.startswith('https://stackoverflow.com/nocaptcha'):
                    logger.warning('Got a CAPTCHA. Trying in a minute. Notifying...')
                    IFTTnotify(self.event_name, self.key, ('[StackOBot]', 'Got captcha'))
                    sleep(60)
                    continue
                else:
                    logger.error(f'Get didnt get uauth cookie, instead [{r.cookies}]')
                    logger.debug(f'GetRequest({actionurl}) -> Response==[{b64encode(r.content)}]==')
                    raise RuntimeError

            except Exception as e:
                #log CRITICAL
                logger.critical('Couldnt login. Notifying...')
                IFTTnotify(self.event_name, self.key, ('[StackOBot]', 'Login FAILED'))
                logger.exception(e)
                return False
Ejemplo n.º 19
0
from utils import write_to_db
from utils import write_to_db_update
from utils import delete_rows
import pandas as pd
import janitor

if __name__ == '__main__':
    # get trade data for today
    today = pd.Timestamp.today().strftime('%Y-%m-%d')
    formData = {
        "selDatum": today,
        "btnSave": "Show",
        "IsItPregledPovijest": "yes",
        "rbScope": "svi"
    }
    response = requests_retry_session().post(
        'https://zse.hr/default.aspx?id=26523', data=formData)
    tblByDate = pd.read_html(response.text,
                             attrs={'id': 'dnevna_trgovanja'},
                             thousands=".",
                             decimal=',')[0]
    if len(tblByDate.index) == 0:
        print("There are no trade data for ZSE.")
    else:
        # clean table
        tblByDate = janitor.clean_names(tblByDate)
        tblByDate = tblByDate.rename(columns={
            'ticker': 'symbol',
            'change_%': 'change'
        })
        tblByDate['change'] = tblByDate['change'].str.extract('(.\d+,\d+)')
        tblByDate['change'] = tblByDate['change'].str.replace(',', '.')
Ejemplo n.º 20
0
import requests
import os
import json
import time

from utils import get_valid_filename, requests_retry_session

api_base = 'https://mobileapi.jumbo.com/v5'
jumbo_products_path = 'products/jumbo'

session = requests_retry_session()


def fetch_product(product_id):
    url = '{}/products/{}'.format(api_base, product_id)

    r = session.get(url)
    r.raise_for_status()

    return r.json()['product']['data']


def fetch_products(size=100):
    offset = 0

    products = []
    while True:
        url = '{}/products?count={}&offset={}'.format(api_base, size, offset)
        try:
            r = session.get(url)
            r.raise_for_status()
Ejemplo n.º 21
0
def update_anime(type, metadata, media, force):
    result = JSON.ObjectFromString(get_anime(metadata.id))
    anime = result['data']['Page']['media'][0]
    has_mal_page = True
    #Get episode data
    if Prefs['episode_support']:
        try:
            mal_episodes = get_episodes(str(anime['idMal']))
        except:
            has_mal_page = False
            Log.Error('Error: Show has no episode data: ' + metadata.id)
    else:
        has_mal_page = False

    # Genres
    if metadata.genres is None or force:
        metadata.genres.clear()
        try:
            metadata.genres = anime['genres']
        except:
            Log.Error('Error: Show has no genres: ' + metadata.id)

    # Rating
    if metadata.rating is None or force:
        try:
            metadata.rating = float(anime['averageScore']) / 10
        except:
            Log.Error('Error: Show has no rating: ' + metadata.id)

    # Title
    if metadata.title is None or force:
        title_language = Prefs['title_language']
        for language in anime['title']:
            if language == title_language:
                metadata.title = anime['title'][language]
                break
            metadata.title = anime['title'][language]

    # Posters
    if metadata.posters is None or force:
        try:
            poster = Proxy.Media(
                requests_retry_session().get(anime['coverImage']['extraLarge'],
                                             verify=certifi.where()).content)
            metadata.posters[anime['coverImage']['extraLarge']] = poster
        except:
            Log.Error('Error: Show has no posters: ' + metadata.id)

    # Summary
    if metadata.summary is None or force:
        try:
            h = HTMLParser()
            cleanr = re.compile('<.*?>')
            metadata.summary = re.sub(cleanr, '',
                                      h.unescape(anime['description']))
        except:
            Log.Error('Error: Show has no summary: ' + metadata.id)

    # Country
    if metadata.countries is None or force:
        try:
            if anime['countryOfOrigin'] == 'JP':
                metadata.countries = ['Japan']
            elif anime['countryOfOrigin'] == 'CN':
                metadata.countries = ['China']
            elif anime['countryOfOrigin'] == 'KR':
                metadata.countries = ['Korea']
            else:
                metadata.countries = ['Unknown, please report']
        except:
            Log.Error('Error: Show has no country of origin: ' + metadata.id)

    # Start Date
    if metadata.originally_available_at is None or force:
        try:
            metadata.originally_available_at = datetime(
                anime['startDate']['year'], anime['startDate']['month'],
                anime['startDate']['day'])
        except:
            Log.Error('Error: Show has no start date: ' + metadata.id)

    # Studio
    if metadata.studio is None or force:
        try:
            metadata.studio = anime['studios']['edges'][0]['node']['name']
        except:
            Log.Error('Error: Show has no studio: ' + metadata.id)

    if metadata.roles is None or force:
        metadata.roles.clear()

    # Characters
    if metadata.roles is None or force:
        try:
            for character in anime['characters']['edges']:
                # Create new role
                role = metadata.roles.new()

                # Get correct VA
                for VA in character['voiceActors']:

                    # Set VA Name
                    try:
                        role.name = VA['name']['full']
                    except:
                        pass

                    # Set VA Photo
                    try:
                        role.photo = VA['image']['large']
                    except:
                        pass

                    # Set Character Name
                    try:
                        role.role = character['node']['name']['full']
                    except:
                        pass
        except:
            Log.Error('Error: Show has no Characters: ' + metadata.id)

    # Staff
    if metadata.roles is None or force:
        try:
            for staff in anime['staff']['edges']:

                # Create new role
                role = metadata.roles.new()

                # Set Staff Name
                try:
                    role.name = staff['node']['name']['full']
                except:
                    pass

                # Set Staff Photo
                try:
                    role.photo = staff['node']['image']['large']
                except:
                    pass

                # Set Staff Role
                try:
                    role.role = staff['role']
                except:
                    pass
        except:
            Log.Error('Error: Show has no staff: ' + metadata.id)

    #Reviews
    if metadata.reviews is None or force:
        metadata.reviews.clear()
        try:
            for review in anime['reviews']['edges']:

                # Create new review
                r = metadata.reviews.new()

                # Set Review Author
                try:
                    r.author = review['node']['user']['name']
                except:
                    pass

                # Set Review Source
                r.source = 'AniList'

                # Set Review Image
                try:
                    if int(review['node']['score']) >= 50:
                        r.image = 'rottentomatoes://image.review.fresh'
                    else:
                        r.image = 'rottentomatoes://image.review.rotten'
                except:
                    pass

                # Set Review Link
                try:
                    r.link = review['node']['siteUrl']
                except:
                    pass

                # Set Review Text
                try:
                    r.text = review['node']['summary']
                except:
                    pass
        except:
            Log.Error('Error: Show has no reviews: ' + metadata.id)

    # TV Specific
    if type == 'tv':

        # Banners
        if metadata.banners is None or metadata.art is None or force:
            try:
                banner_hash = base64.b64encode(str(anime['bannerImage']))
                banner = Proxy.Media(requests_retry_session().get(
                    anime['bannerImage'], verify=certifi.where()).content)
            except:
                Log.Error('Error: Show has no banners: ' + metadata.id)
            if metadata.banners is None or force:
                metadata.banners[banner_hash] = banner
            if metadata.art is None or force:
                metadata.art[banner_hash] = banner

        # Episodes
        if Prefs['episode_support'] and has_mal_page and 1 in media.seasons:
            update_episodes(media, metadata, force, mal_episodes)

    # Movie Specific
    if type == 'movie':

        if metadata.art is None or force:
            try:
                banner_hash = base64.b64encode(str(anime['bannerImage']))
                banner = Proxy.Media(requests_retry_session().get(
                    anime['bannerImage'], verify=certifi.where()).content)
                metadata.art[banner_hash] = banner
            except:
                Log.Error('Error: Show has no banners: ' + metadata.id)

        # Year
        if metadata.year is None or force:
            try:
                metadata.year = anime['startDate']['year']
            except:
                Log.Error('Error: Show has no start date: ' + metadata.id)

        # Roles
        if metadata.roles is None or force:

            # Staff
            try:
                for staff in anime['staff']['edges']:

                    # Director
                    try:
                        if staff['role'] == 'Director':
                            director = metadata.directors.new()
                            director.name = staff['node']['name']['full']
                    except:
                        pass
            except:
                Log.Error('Error: Show has no staff: ' + metadata.id)
Ejemplo n.º 22
0
def search_anime(type, results, media, lang):
    search_term = media.show if type == 'tv' else media.name
    search_year = media.year

    if media.year is None:
        search_year = 'null'

    query = '''
    query ($id: Int, $perPage: Int, $search: String, $seasonYear: Int) {
        anime: Page(perPage: $perPage) {
            pageInfo {
                total
            }
            results: media(type: ANIME, id: $id, search: $search seasonYear: $seasonYear) {
                id
                title {
                    romaji
                }
                startDate {
                    year
                }
            }
        }
    }
    '''

    if Prefs['search_type'] == 'id-name':
        variables = '''{
            "id": "''' + re.match('^([0-9]*?) ', search_term).group(1) + '''"
        }'''
    if Prefs['search_type'] == 'name-id':
        variables = '''{
            "id": "''' + re.search(' ([0-9]*?)$', search_term).group(1) + '''"
        }'''
    if Prefs['search_type'] == 'id':
        variables = '''{
            "id": "''' + re.match('^([0-9]*?)$', search_term).group(1) + '''"
        }'''
    if Prefs['search_type'] == 'name':
        variables = '''{
            "search": "''' + search_term + '''",
            "perPage": 6
        }'''
    if Prefs['search_type'] == 'any':
        if search_term.isdigit():
            variables = '''{
                "id": "''' + search_term + '''"
            }'''
        else:
            variables = '''{
                "search": "''' + search_term + '''",
                "perPage": 6,
                "seasonYear": ''' + str(search_year) + '''
            }'''

    try:
        request = requests_retry_session().post('https://graphql.anilist.co',
                                                data={
                                                    'query': query,
                                                    'variables': variables
                                                },
                                                verify=certifi.where())
    except:
        Log.Error('Error searching AniList - Anime: ' + query)
        return

    s = 100
    for result in JSON.ObjectFromString(
            request.content)['data']['anime']['results']:
        results.Append(
            MetadataSearchResult(id=str(result['id']),
                                 name=result['title']['romaji'],
                                 year=result['startDate']['year'],
                                 score=s,
                                 lang=lang))
        s = s - 1
    return
Ejemplo n.º 23
0
def populate_reviews(period):
    """ download all new reviews made available on pubmed in the last <period> # days & save to db if they have trials in
    CrossRef or Cochrane """
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    r = utils.requests_retry_session().get(
        base_url,
        params={
            'db':
            'pubmed',
            'term':
            'systematic review[ti] OR meta analysis[ti] OR cochrane database of systematic reviews[ta]',
            'format':
            'json',
            'retmax':
            300000,
            'email':
            crud.eutils_email,
            'tool':
            crud.eutils_tool,
            'api_key':
            eutils_key,
            'date_type':
            'edat',
            'mindate': (datetime.now().date() -
                        timedelta(days=period)).strftime('%Y/%m/%d'),
            'maxdate':
            '3000'
        })
    json = r.json()
    pmids = json['esearchresult']['idlist']
    print len(pmids)
    segments = utils.chunks(pmids, 100)
    ec = Client(api_key=eutils_key)
    for s in segments:
        while True:
            try:
                articles = ec.efetch(db='pubmed', id=s)
                break
            except (eutils.exceptions.EutilsNCBIError,
                    eutils.exceptions.EutilsRequestError,
                    requests.exceptions.SSLError,
                    requests.exceptions.ConnectionError) as e:
                print e
                time.sleep(5)
        a_iter = iter(articles)
        while True:
            try:
                article = a_iter.next()
            except StopIteration:
                break
            print '-----------------' + article.pmid + '-------------------------'
            if article.doi is not None:
                ids = bot.check_trialpubs_nctids(article.pmid, article.doi)
            else:
                ids = bot.check_trialpubs_nctids(article.pmid)
            if ids:
                if ids.pmids:
                    print ids.pmids
                    count = crud.articles_with_nctids(
                        tuple(x for x in ids.pmids))
                    print count
                    if count and len(count) > 0:
                        print 'articles with links = ' + str(len(count))
                        print 'inserting ' + str(article.pmid)
                        crud.pubmedarticle_to_db(article, 'systematic_reviews')
                        for trialpub in count:
                            crud.review_publication(article.pmid, trialpub, 9)
                            linked_ncts = crud.linked_nctids(trialpub)
                            for nct in linked_ncts:
                                crud.review_trial(article.pmid,
                                                  nct,
                                                  False,
                                                  'included',
                                                  user_id=9,
                                                  nickname='crossrefbot')
                if ids.nctids:
                    crud.pubmedarticle_to_db(article, 'systematic_reviews')
                    print 'nct ids in crossref = ' + str(len(ids.nctids))
                    for nct_id in ids.nctids:
                        crud.review_trial(article.pmid, nct_id, False,
                                          'included', 'crossrefbot', 9)
                if not ids.nctids and not ids.pmids:
                    print 'found nothing'
            else:
                print 'nothing'
            if 'Cochrane' in article.jrnl:
                print 'Cochrane'
                crud.pubmedarticle_to_db(article, 'systematic_reviews')
                bot.cochranebot(article.doi, article.pmid)
                bot.cochrane_ongoing_excluded(article.doi, article.pmid)
                conn = dblib.create_con(VERBOSE=True)
                cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
                cur.execute(
                    "select rt.review_id, json_agg(distinct v.user_id) as users from review_rtrial rt"
                    " inner join votes v on rt.id = v.link_id where rt.review_id = %s group by"
                    " rt.review_id;", (article.pmid, ))
                new_users = cur.fetchone()
                if not new_users:
                    new_users = {'users': []}
                if not {17, 9} & set(new_users['users']):
                    print 'deleting ' + str(new_users['users']), article.pmid
                    cur.execute(
                        "delete from votes where link_id in (select id from review_rtrial where review_id = %s);",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from review_trialpubs where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from review_rtrial where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from systematic_reviews where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                conn.close()
            else:
                print 'not cochrane'
Ejemplo n.º 24
0
def update_anime(type, metadata, media, force):
    resp = get_anime(metadata.id)
    Log.Debug("Anilist Response: {}".format(resp))
    result = JSON.ObjectFromString(resp)
    anime = result['data']['Page']['media'][0]
    has_mal_page = True
    #Get episode data
    if Prefs['episode_support']:
        try:
            mal_episodes = get_episodes(str(anime['idMal']))
        except:
            has_mal_page = False
            Log.Error('Error: Show has no episode data: ' + metadata.id)
    else: has_mal_page = False

    # Genres
    metadata.genres.clear()
    try:
        metadata.genres = anime['genres'] + anime['tags'] + [anime['format']] + [anime['status']] + [anime['season'] + ' ' + anime['seasonYear']]
    except:
        Log.Error('Error: Show has no genres: ' + metadata.id)

    # Rating
    try:
        metadata.rating = float(anime['averageScore']) / 10
    except:
        Log.Error('Error: Show has no rating: ' + metadata.id)

    # Title
    title_language = Prefs['title_language']
    for language in anime['title']:
        if language == title_language:
            metadata.title = anime['title'][language]
            break
        metadata.title = anime['title'][language]

    # Posters
    try:
        poster = Proxy.Media(
            requests_retry_session().get(
                anime['coverImage']['extraLarge'],
                verify=certifi.where()
            ).content
        )
        # //save file
        metadata.posters.validate_keys([])
        metadata.posters[anime['coverImage']['extraLarge']] = poster
    except:
        Log.Error('Error: Show has no posters: ' + metadata.id)

    # Summary
    try:
        cleanr = re.compile('<.*?>')
        metadata.summary = re.sub(cleanr, '', anime['description'])
    except:
        Log.Error('Error: Show has no summary: ' + metadata.id)

    # Country
    try:
        if anime['countryOfOrigin'] == 'JP':
            metadata.countries = ['Japan']
        elif anime['countryOfOrigin'] == 'CN':
            metadata.countries = ['China']
        elif anime['countryOfOrigin'] == 'KR':
            metadata.countries = ['Korea']
        else:
            metadata.countries = ['Unknown, please report']
    except:
        Log.Error('Error: Show has no country of origin: ' + metadata.id)

    # Start Date
    try:
        metadata.originally_available_at = datetime(anime['startDate']['year'], anime['startDate']['month'], anime['startDate']['day'])
    except:
        Log.Error('Error: Show has no start date: ' + metadata.id)

    # Studio
    try:
        metadata.studio = anime['studios']['edges'][0]['node']['name']
    except:
        Log.Error('Error: Show has no studio: ' + metadata.id)

    metadata.roles.clear()

    # Characters
        # Log.Error(anime['characters']['edges'])
    try:
        for character in anime['characters']['edges']:
            # Create new role
            role = metadata.roles.new()

            # Get correct VA
            for VA in character['voiceActors']:

                # Set VA Name
                try:
                    role.name = VA['name']['full']
                except:
                    pass

                # Set VA Photo
                try:
                    role.photo = VA['image']['large']
                except:
                    pass

                # Set Character Name
                try:
                    role.role = character['node']['name']['full']
                except:
                    pass
    except:
        Log.Error('Error: Show has no Characters: ' + metadata.id)

    # Staff
    try:
        for staff in anime['staff']['edges']:

            # Create new role
            role = metadata.roles.new()

            # Set Staff Name
            try:
                role.name = staff['node']['name']['full']
            except:
                pass

            # Set Staff Photo
            try:
                role.photo = staff['node']['image']['large']
            except:
                pass

            # Set Staff Role
            try:
                role.role = staff['role']
            except:
                pass
    except:
        Log.Error('Error: Show has no staff: ' + metadata.id)

    # TV Specific
    if type == 'tv':

    # Banners
        try:
            banner_hash = base64.b64encode(str(anime['bannerImage']))
            banner = Proxy.Media(
                requests_retry_session().get(
                    anime['bannerImage'],
                    verify=certifi.where()
                ).content
            )
        except:
            Log.Error('Error: Show has no banners: ' + metadata.id)
            metadata.banners[banner_hash] = banner
            metadata.art[banner_hash] = banner


    # Movie Specific
    if type == 'movie':

        try:
            banner_hash = base64.b64encode(str(anime['bannerImage']))
            banner = Proxy.Media(
                requests_retry_session().get(
                    anime['bannerImage'],
                    verify=certifi.where()
                ).content
            )
            metadata.art[banner_hash] = banner
        except:
            Log.Error('Error: Show has no banners: ' + metadata.id)

    # Year
        try:
            metadata.year = anime['startDate']['year']
        except:
            Log.Error('Error: Show has no start date: ' + metadata.id)

    # Roles

        # Staff
        try:
            for staff in anime['staff']['edges']:

                # Director
                try:
                    if staff['role'] == 'Director':
                        director = metadata.directors.new()
                        director.name = staff['node']['name']['full']
                except:
                    pass
        except:
            Log.Error('Error: Show has no staff: ' + metadata.id)

    # Episodes
    if Prefs['episode_support'] and has_mal_page and 1 in media.seasons:
        update_episodes(media, metadata, force, mal_episodes)