Ejemplo n.º 1
0
def get_data(dc, timeout=120, clear_cache=False):
    print "Retrieving data for datacenter: {0}".format(dc['name'])
    # Configure our cache, create one DB per DC.
    requests_cache.install_cache(dc["name"],
                                 backend='sqlite',
                                 old_data_on_error=True)
    if clear_cache:
        requests_cache.clear()
    url = 'http://{0}:{1}/results'.format(dc['url'], dc['port'])
    data = None
    r = None
    now = time.ctime(int(time.time()))
    try:
        if 'user' and 'password' in dc:
            r = requests.get(url,
                             auth=(dc['user'], dc['password']),
                             timeout=timeout)
        else:
            r = requests.get(url, timeout=timeout)
        print "Time: {0} / Used Cache: {1}".format(now, r.from_cache)
        # print "CACHE USED? {}".format(requests_cache.get_cache())
        r.raise_for_status()
    except Exception as ex:
        print "Got exception while retrieving data for dc: {0} ex: {1}".format(
            dc, str(ex))
        pass
    finally:
        if r:
            data = r.json()
            r.close()
        else:
            print "Got no data while making API call to {0} ".format(dc)

    print "Data Retrieval for datacenter {0} complete".format(dc['name'])
    return data
Ejemplo n.º 2
0
def get_proxies():

    try:

        proxies = dict()

        response = requests.get(current_app.config["PROXY_URL"])

        current_app.logger.info(u"获取代理IP:  " + response.text)

        if response.ok and (':' in response.text):

            proxies["http"] = "http://" + response.text

        else:

            requests_cache.clear()

    except:

        current_app.logger.error(traceback.format_exc())

        requests_cache.clear()

    finally:

        return proxies
Ejemplo n.º 3
0
    def test_should_fail_if_cache_is_not_turned_off_and_on_properly(self):
        httpretty.register_uri(
            httpretty.GET,
            "http://ben-major.co.uk/labs/top40/api/albums",
            body=request_send_file,
            content_type='text/json',
            status=200
        )

        #: Clear the cache, so that no cached reads are present
        requests_cache.clear()

        #: Turn the cache off
        self.top40.reset_cache(None)

        #: Make a request, but this should not find its way into the cache
        response = requests.get("http://ben-major.co.uk/labs/top40/api/albums")

        expect(response).to(not_(have_property("from_cache")))

        #: Turn cache on
        self.top40.reset_cache(3600)

        #: Prime the cache
        response = requests.get("http://ben-major.co.uk/labs/top40/api/albums")

        #: The first read should not have come from the cache
        expect(response.from_cache).to(be(False))

        #: This time it should be from the cache
        response = requests.get("http://ben-major.co.uk/labs/top40/api/albums")

        expect(response).to(have_property("from_cache"))
        expect(response.from_cache).to(be(True))
Ejemplo n.º 4
0
def get_request(url, params, use_cache=False):

    if not use_cache: requests_cache.clear()
    resp = requests.get(url, params=params)
    print(f"From cache: {resp.from_cache}, {resp.url}")
    return resp.json(
    )  # Return a python object (a list of dictionaries in this case)
Ejemplo n.º 5
0
def get_proxies():

    try:

        response = requests.get(current_app.config["PROXY_URL"])

        current_app.logger.info(
            str(response.status_code) + "\t" + response.text)

        if response.ok and ":" in response.text:

            proxies = dict(http="http://" + response.text)

        else:

            requests_cache.clear()

            proxies = dict()

    except:

        requests_cache.clear()

        current_app.logger.info(traceback.format_exc())

    finally:

        return proxies
Ejemplo n.º 6
0
def get_articles(url):

    source = ""

    while 1:

        source = retrieve(url)

        try:

            source = re.search("msgList = (.*);", source).group(1)

        except AttributeError:

            current_app.logger.critical(u"请输入验证码")

            time.sleep(3)

            requests_cache.clear()

            continue

        break

    for item in json.loads(source)["list"]:

        item = item["app_msg_ext_info"]

        if item["content_url"]:

            item["content"], item["read_num"], item["post_date"] = get_content(
                item)

        yield item
Ejemplo n.º 7
0
    def ls(self, verbose=True, update=False):
        """
        List the available datasets.

        :param verbose: Print the datasets if true
        :param update: Refresh the cache if true
        """

        self.datasets = []

        if update:

            requests_cache.clear()

        response = requests.get(self.base_url)

        if response.ok:

            soup = BeautifulSoup(response.content, "html.parser")

            links = soup.find_all("a")

            for link in links:

                href = link.get("href")

                if ".zip" in href:

                    self.datasets.append(href)

                    if verbose:
                        print(href)
Ejemplo n.º 8
0
def get_pokedex(
        cache: bool = True,
        processes: int = DEFAULT_PROCESSES) -> Dict[Pokemon, List[int]]:
    """
    Returns a dictionary where the keys are all the pokemons and the values are
    the list of related pokemons.
    """
    if not cache:
        print("Removing cache")
        requests_cache.clear()
    else:
        print("Using cache")

    data = get_throttled(POKEDEX_URL)
    soup = BeautifulSoup(data.text, 'html.parser')
    infocards = soup.find_all(class_="infocard")

    # Prune the tree because multiprocessing needs to pickle the data,
    # and passing the original elements cause RecursionError
    infocards_pruned = (prune_bs_tree(infocard) for infocard in infocards)

    with multiprocessing.Pool(processes) as pool:
        parsed = pool.map(parse_infocard, infocards_pruned)

    return dict(parsed)
Ejemplo n.º 9
0
def malshare_update(filename, suffix=""):
    "Update a ClamAV database with MalShare signatures."
    with portalocker.Lock(filename, "ab+") as db_file:
        db_file.seek(0)
        if db_file.readline():
            db_file.seek(-200, os.SEEK_END)
            last_line = db_file.readlines()[-1]
            if not last_line.endswith(b"\n"):
                db_file.write(b"\n")
            old_date = _date_from_db(last_line.decode())

        else:
            old_date = date.today() - timedelta(days=16)

        if old_date in (date.today(), date.today() - timedelta(days=1)):
            return False

        if DEBUG:
            sys.stderr.write(" ".join([
                "Update ",
                str(old_date),
                str(date.today() - timedelta(days=1)), "\n"
            ]))

        db_file.write(
            malshare_by_dates(stop=old_date + timedelta(days=1),
                              suffix=suffix,
                              silent=True).encode())

        if REQUESTS_CACHE:
            requests_cache.clear()

        return True
Ejemplo n.º 10
0
def removeTempImages():
    dir = os.path.dirname(os.path.abspath(__file__))
    files = os.listdir("{}/temp".format(dir))

    # Clear the rate cache
    requests_cache.clear()
     print ("Rate cache was cleared.")
Ejemplo n.º 11
0
 def __init__(self, acs, tz):
     self.acs = acs
     self.tz = tz
     # Install sqlite cache for celestrak with a 24 hour duration
     # Good enough for celestrak and other data. Cache disabled when appropriate
     requests_cache.install_cache('teeminus10_cache', expire_after=24*60*60)
     requests_cache.clear()
Ejemplo n.º 12
0
def load_annotations(data_folder):
    contents = os.listdir(path=data_folder)
    try:
        infile = [i for i in contents if '.tsv' in i][0]
    except IndexError:
        raise Exception(f"No .tsv found in {contents}")
    
    with open(infile, 'r') as litcovid_tsv:
        tsv_reader = csv.reader(litcovid_tsv, delimiter='\t')
        for i in range(32):
            next(tsv_reader)
        pmids = [line[0] for line in tsv_reader]

    doc_id_set = set()
    requests_cache.install_cache('litcovid_cache')
    requests_cache.clear()
    s = requests_cache.CachedSession()
    s.hooks = {'response': throttle}
    logging.debug("requests_cache: %s", requests_cache.get_cache().responses.filename)
    for i, pmid in enumerate(pmids,start=1):
        # NCBI eutils API limits requests to 10/sec
        if i % 100 == 0:
            logging.info("litcovid.parser.load_annotations progress %s", i)

        doc = getPubMedDataFor(pmid, session=s)
        if doc['_id'] not in doc_id_set:
            yield doc
        doc_id_set.add(doc['_id'])

    remove_expired(s)
Ejemplo n.º 13
0
 def _clear_cache(self):
     ''' Clear the globally installed cache. '''
     try:
         requests_cache.clear()
     except AttributeError:
         # requests_cache is not enabled
         print("requests_cache is not enabled. Nothing to clear.")
Ejemplo n.º 14
0
def requests_cache_test():
    requests_cache.install_cache(backend='redis', expire_after=timedelta(days=30))
    requests_cache.clear()
    url = 'http://example.python-scraping.com/view/United-Kingdom-239'
    resp = requests.get(url)
    print(resp.from_cache)
    resp = requests.get(url)
    print(resp.from_cache)
Ejemplo n.º 15
0
 def __init__(self, acs, tz):
     self.acs = acs
     self.tz = tz
     # Install sqlite cache for celestrak with a 24 hour duration
     # Good enough for celestrak and other data. Cache disabled when appropriate
     requests_cache.install_cache('teeminus10_cache',
                                  expire_after=24 * 60 * 60)
     requests_cache.clear()
Ejemplo n.º 16
0
def test_cached_response():
    factory = RequestFactory()
    request = factory.get('/events-with-subscriptions/' + EVENT_ID + '/')
    retrieve_event(request)
    assert_true(retrieve_event.cache)
    requests_cache.clear()
    retrieve_event(request)
    assert_false(retrieve_event.cache)
Ejemplo n.º 17
0
    def load(self):

        response = requests.get(self.REDDIT_SEARCH, params=self.QUERY)
        if response.status_code != 200:
            requests_cache.clear()

        response_json = response.json()
        response_data = response_json.get('data', {})
        return response_data.get('children', [])
 def test_get_from_cache(self):
     country_info = CountryInfo()
     requests_cache.install_cache(cache_name='testing-cache',
                                  backend='sqlite',
                                  expire_after=5)
     response1 = country_info.get_info('egypt')
     response2 = country_info.get_info('egypt')
     requests_cache.clear()
     self.assertEqual(response1['from cache'], False)
     self.assertEqual(response2['from cache'], True)
Ejemplo n.º 19
0
def flush(verbose):
    """Flushes the contents of the cache."""
    configure_logging(verbose)
    requests_cache.install_cache('leech')
    requests_cache.clear()

    conn = sqlite3.connect('leech.sqlite')
    conn.execute("VACUUM")
    conn.close()

    logger.info("Flushed cache")
Ejemplo n.º 20
0
def get_url_data(serviceurl, params=None):
    """

    :param serviceurl: url to retrieve data
    :param params: http://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
    :return: json url_data
    """

    # Get data from the url
    # Support https without verification of certificate
    # req = requests.get(serviceurl, verify=False, params=params)

    cnt = 0
    max_retry = 3
    purl = parse_url(serviceurl)
    if purl.auth:
        username = purl.auth.split(':')[0]
        password = purl.auth.split(':')[1]
    else:
        username = None
        password = None
    # Add url like http://host
    burl = '{}://{}'.format(purl.scheme, purl.host)
    if purl.port:
        # Add port like: http://host:8080
        burl += ':{}'.format(purl.port)
    if purl.request_uri:
        # Add path and query like: http://host:8080/path/uri?query
        burl += '{}'.format(purl.request_uri)

    while cnt < max_retry:
        try:
            req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password))
            if req.json():
                return req.json()
            elif req.from_cache:
                # Clear cache to retry again
                requests_cache.clear()
                req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password))
                if req.json():
                    return req.json()
            else:
                # Raise a custom exception
                raise ValueError('No data from response')

        except requests.exceptions.RequestException as e:
            time.sleep(2 ** cnt)
            cnt += 1
            if cnt >= max_retry:
                raise e

    data = req.json()

    return data
Ejemplo n.º 21
0
def flush(verbose):
    """Flushes the contents of the cache."""
    configure_logging(verbose)
    requests_cache.install_cache('leech')
    requests_cache.clear()

    conn = sqlite3.connect('leech.sqlite')
    conn.execute("VACUUM")
    conn.close()

    logger.info("Flushed cache")
Ejemplo n.º 22
0
def create_xml():
    requests_cache.clear()
    api = Api(settings.WOO_HOST,
              settings.WOO_CONSUMER_KEY,
              settings.WOO_CONSUMER_SECRET,
              console_logs=False)
    config = json.load(open(settings.XML_CONFIG_FILENAME))
    print("\033[95m[Feed XML] Getting shipping methods...\033[0m")
    utils.default_shippings.clear()
    utils.default_tax_rates.clear()
    shipping_zones = api.get_shipping_zones()
    for zone in shipping_zones:
        zone_locations = api.get_shipping_zone_locations(
            shipping_zone_id=zone.id)
        zone_methods = api.get_shipping_zone_methods(shipping_zone_id=zone.id)
        for location in zone_locations:
            for method in zone_methods:
                utils.default_shippings.append(
                    utils.get_shipping_method(method, location))
    print("\033[95m[Feed XML] Getting tax rates...\033[0m")
    taxes = get_tax_rates(api)
    for config_file_name, config_file in config.items():
        config_file_languages = config_file.get('languages')
        config_file_types = config_file.get('types')

        if config_file_languages and config_file_types:
            for language_file, languages_in_file in config_file_languages.items(
            ):
                languages_in_file = [languages_in_file] if not isinstance(
                    languages_in_file, list) else languages_in_file

                elements = []
                for language in languages_in_file:
                    print((
                        "\033[95m[Feed XML] Getting products in language '%s'...\033[0m"
                    ) % language)
                    if 'variation' in config_file_types:
                        elements.extend(
                            get_products_and_variations(api, language))
                    else:
                        elements.extend(get_products(api, language))

                config_file_path = config_file_name.split('/')
                config_file_directory = ('/').join(['feeds'] +
                                                   config_file_path[:-1])
                print((
                    "\033[95m[Feed XML] Generating '%s/%s_%s_%s.xml'...\033[0m"
                ) % (config_file_directory, settings.XML_FEED_FILENAME,
                     language_file, config_file_path[-1]))
                selected_products = filter(
                    lambda product: product.type in config_file_types.keys(),
                    elements)
                write_xml(selected_products, language_file, config_file_path,
                          config_file_types)
Ejemplo n.º 23
0
def generate_csl_items(args, citekeys_df):
    """
    General CSL (citeproc) items for standard_citekeys in citekeys_df.
    Writes references.json to disk and logs warnings for potential problems.
    """
    # Read manual references (overrides) in JSON CSL
    manual_refs = load_manual_references(args.manual_references_paths)

    requests_cache.install_cache(args.requests_cache_path,
                                 include_get_headers=True)
    cache = requests_cache.get_cache()
    if args.clear_requests_cache:
        logging.info('Clearing requests-cache')
        requests_cache.clear()
    logging.info(
        f'requests-cache starting with {len(cache.responses)} cached responses'
    )

    csl_items = list()
    failures = list()
    for standard_citekey in citekeys_df.standard_citekey.unique():
        if standard_citekey in manual_refs:
            csl_items.append(manual_refs[standard_citekey])
            continue
        elif standard_citekey.startswith('raw:'):
            logging.error(
                f'CSL JSON Data with a standard_citekey of {standard_citekey!r} not found in manual-references.json. '
                'Metadata must be provided for raw citekeys.')
            failures.append(standard_citekey)
        try:
            csl_item = citekey_to_csl_item(standard_citekey)
            csl_items.append(csl_item)
        except Exception:
            logging.exception(
                f'Citeproc retrieval failure for {standard_citekey!r}')
            failures.append(standard_citekey)

    logging.info(
        f'requests-cache finished with {len(cache.responses)} cached responses'
    )
    requests_cache.uninstall_cache()

    if failures:
        message = 'CSL JSON Data retrieval failed for the following standardized citation keys:\n{}'.format(
            '\n'.join(failures))
        logging.error(message)

    # Write JSON CSL bibliography for Pandoc.
    with args.references_path.open('w', encoding='utf-8') as write_file:
        json.dump(csl_items, write_file, indent=2, ensure_ascii=False)
        write_file.write('\n')
    return csl_items
    def _config_cache(self, cache_settings: dict):
        ''' Configure the cache settings '''

        folder_path = os.path.expanduser(os.path.expandvars(cache_settings['path']))
        file_name = os.path.join(folder_path, cache_settings['filename'])

        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        requests_cache.install_cache(file_name, expire_after=cache_settings['max_age_in_seconds'])

        if cache_settings['refresh_cache'] and os.path.isfile("{}.sqlite".format(file_name)):
            requests_cache.clear()
Ejemplo n.º 25
0
def populate_top_pages():
    """
    This goes through the domains stored locally and gets the
    latest chartbeat data everh hour
    """
    requests_cache.clear()
    for host_obj in Host.query.all():
        api_key = host_obj.api_key.api_key
        host_name = host_obj.host_name
        res = chartbeat_request(api_key, host_name)
        host_obj.top_pages = res.json()
        host_obj.updated_at = datetime.datetime.utcnow()
    db.session.commit()
Ejemplo n.º 26
0
Archivo: manage.py Proyecto: T-G-P/Misc
def populate_top_pages():
    """
    This goes through the domains stored locally and gets the
    latest chartbeat data everh hour
    """
    requests_cache.clear()
    for host_obj in Host.query.all():
        api_key = host_obj.api_key.api_key
        host_name = host_obj.host_name
        res = chartbeat_request(api_key, host_name)
        host_obj.top_pages = res.json()
        host_obj.updated_at = datetime.datetime.utcnow()
    db.session.commit()
Ejemplo n.º 27
0
    def sync(self):
        """Find workflow attributes, create a new template pipeline on TEMPLATE"""

        # Clear requests_cache so that we don't get stale API responses
        requests_cache.clear()

        log.info("Pipeline directory: {}".format(self.pipeline_dir))
        if self.from_branch:
            log.info("Using branch '{}' to fetch workflow variables".format(
                self.from_branch))
        if self.make_pr:
            log.info("Will attempt to automatically create a pull request")

        self.inspect_sync_dir()
        self.get_wf_config()
        self.checkout_template_branch()
        self.delete_template_branch_files()
        self.make_template_pipeline()
        self.commit_template_changes()

        # Push and make a pull request if we've been asked to
        if self.made_changes and self.make_pr:
            try:
                # Check that we have an API auth token
                if os.environ.get("GITHUB_AUTH_TOKEN", "") == "":
                    raise PullRequestException("GITHUB_AUTH_TOKEN not set!")

                # Check that we know the github username and repo name
                if self.gh_username is None and self.gh_repo is None:
                    raise PullRequestException(
                        "Could not find GitHub username and repo name")

                self.push_template_branch()
                self.create_merge_base_branch()
                self.push_merge_branch()
                self.make_pull_request()
                self.close_open_template_merge_prs()
            except PullRequestException as e:
                self.reset_target_dir()
                raise PullRequestException(e)

        self.reset_target_dir()

        if not self.made_changes:
            log.info("No changes made to TEMPLATE - sync complete")
        elif not self.make_pr:
            log.info(
                "Now try to merge the updates in to your pipeline:\n  cd {}\n  git merge TEMPLATE"
                .format(self.pipeline_dir))
Ejemplo n.º 28
0
def retrieve(url, headers=None):

    current_app.logger.info(inspect.stack()[1][3] + " retrieve " + url)

    if not headers:

        headers = dict()

    while 1:

        headers["user-agent"] = UserAgent().random

        try:

            response = requests.get(url,
                                    headers=headers,
                                    proxies=get_proxies(),
                                    timeout=3)

            current_app.logger.info(
                str(response.status_code) + "\t" + response.reason)

            if response.ok and (url == response.request.url):

                if "referer" in headers:

                    return response

                return response.text

            else:

                current_app.logger.info(response.request.url)

                requests_cache.clear()

                time.sleep(3)

                continue

        except:

            current_app.logger.info(traceback.format_exc())

            requests_cache.clear()

            time.sleep(3)

            continue
Ejemplo n.º 29
0
    def enable_cache(self, expire_after=300, drop_cache=False):
        if expire_after is None:
            log.info(
                f'Setting up response cache to never expire (infinite caching)'
            )
        else:
            log.info(
                f'Setting up response cache to expire after {expire_after} seconds'
            )
        requests_cache.install_cache(expire_after=expire_after)
        if drop_cache:
            log.info(f'Dropping cache')
            requests_cache.clear()

        return self
Ejemplo n.º 30
0
def generate_csl_items(args, citation_df):
    """
    General CSL (citeproc) items for standard_citations in citation_df.
    Writes references.json to disk and logs warnings for potential problems.
    """
    # Read manual references (overrides) in JSON CSL
    manual_refs = read_manual_references(args.manual_references_path)

    requests_cache.install_cache(args.requests_cache_path,
                                 include_get_headers=True)
    cache = requests_cache.get_cache()
    if args.clear_requests_cache:
        logging.info('Clearing requests-cache')
        requests_cache.clear()
    logging.info(
        f'requests-cache starting with {len(cache.responses)} cached responses'
    )

    csl_items = list()
    failures = list()
    for citation in citation_df.standard_citation.unique():
        if citation in manual_refs:
            csl_items.append(manual_refs[citation])
            continue
        try:
            citeproc = citation_to_citeproc(citation)
            csl_items.append(citeproc)
        except Exception as error:
            logging.exception(f'Citeproc retrieval failure for {citation}')
            failures.append(citation)

    logging.info(
        f'requests-cache finished with {len(cache.responses)} cached responses'
    )
    requests_cache.uninstall_cache()

    if failures:
        message = 'Citeproc retrieval failed for:\n{}'.format(
            '\n'.join(failures))
        logging.error(message)

    # Write JSON CSL bibliography for Pandoc.
    with args.references_path.open('w') as write_file:
        json.dump(csl_items, write_file, indent=2, ensure_ascii=False)
        write_file.write('\n')
    return csl_items
Ejemplo n.º 31
0
 def _get_fahrplans(self):
     if self.update_cache:
         requests_cache.clear()
     self.fahrplans = []
     for url in self.urls:
         try:
             self.fahrplans.append(requests.get(url).json()["schedule"])
         except JSONDecodeError as e:
             print(
                 f"{Colour.FAIL}Problem downloading the Fahrplan {url}. Check your internet connection.{Colour.ENDC}"  # noqa: E501
             )
             print(e)
     if not self.fahrplans:
         print(
             f"{Colour.FAIL}Fahrplan empty. Something is wrong with your urls. Exiting.{Colour.ENDC}"  # noqa: E501
         )
         sys.exit()
Ejemplo n.º 32
0
    def test_caching_works(self):
        import time
        start_time = time.time()

        requests_cache.clear()
        first_time_taken = None

        for i in range(0, 5):
            EDSMInterface.get_bodies("Deciat")
            time_taken = time.time() - start_time
            if not first_time_taken:
                first_time_taken = time_taken
                assert time_taken > 0.1  # Typically takes > 0.5s
            else:
                assert time_taken < 0.1  # Typically ~ 0.002 with the cache installed

            start_time = time.time()
Ejemplo n.º 33
0
def get_rhymes(words, use_cache=False):
    baseurl = "https://api.datamuse.com/words"
    params_diction = {}  # Set up an empty dictionary for query parameters
    params_diction["max"] = "3"  # get at most n results

    new_lst, rhy_lst = [], []
    new_lst = words.strip('.').split()

    #print(resp.from_cache)
    for word in new_lst:
        if not use_cache: requests_cache.clear()
        params_diction["rel_rhy"] = word
        resp = requests.get(baseurl, params=params_diction)

        word_ds = resp.json(
        )  # Return a python object (a list of dictionaries in this case)
        rhy_lst.append([d['word'] for d in word_ds])
    return rhy_lst
Ejemplo n.º 34
0
def clear_cache(deep=False):
    """Removes from disk cached data. Just in case you feel the need of
    a fresh start or you have too much bytes laying around.
    Use it with parsimony. In case of a major update you may want to call
    this function. It may solve conflicts rising on unexpected data
    structures.
    The cache needs to be enabled first, so that the cache path is known.

    Args:
        deep (=False, optional): If true, going for removal of http
                                 cache as well.
    """
    file_names = os.listdir(CACHE_PATH)
    for file_name in file_names:
        if file_name.endswith('.pkl'):
            os.remove(os.path.join(CACHE_PATH, file_name))
    if deep:
        requests_cache.clear()
Ejemplo n.º 35
0
 def send_single_request(self, path):
     query_link = self.generate_request(const.LINK + path)
     request = requests.Request(method='GET', url=query_link)
     i = 0
     while True:
         try:
             thottled_request = self.bt.submit(request)
             data = thottled_request.response.content
             if 'To many requests' in data or 'minimum delay' in data:
                 raise requests.exceptions.ConnectionError
             return data
         except requests.exceptions.ConnectionError:
             requests_cache.clear()
             if i >= 10:
                 raise
             print 'Retrying Request'
             i += 1
             time.sleep(60)
Ejemplo n.º 36
0
        def do_tests_for(backend):
            requests_cache.configure(CACHE_NAME, backend)
            requests_cache.clear()
            n_threads = 10
            url = 'http://httpbin.org/get'
            def do_requests(url, params):
                for i in range(10):  # for testing write and read from cache
                    requests.get(url, params=params)

            for _ in range(20): # stress test
                threads = [Thread(target=do_requests, args=(url, {'param': i})) for i in range(n_threads)]
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()

                for i in range(n_threads):
                    self.assert_(requests_cache.has_url('%s?param=%s' % (url, i)))
Ejemplo n.º 37
0
def get_new_alerts():
    """BROADCASTING - Alerts appeared within 1 minute.

    Returns a string.
    """
    try:
        requests_cache.clear()
        ws = get_worldstate()
    except BaseException:
        return ''
    alert_text = ''
    alerts = ws['Alerts']
    for alert in alerts:
        activation = time.time() - \
            float(alert['Activation']['$date']['$numberLong']) / 1000
        if activation < 60 and activation > 0:
            expiry = float(
                alert['Expiry']['$date']['$numberLong']) / 1000 - time.time()
            req_archwing = 'Archwing' if 'archwingRequired' in alert[
                'MissionInfo'] else ''
            rew_credits = alert['MissionInfo']['missionReward']['credits']
            try:
                rew_items = ' - ' + data_dict['L'][
                    alert['MissionInfo']['missionReward']['items']
                    [0].lower()]['value'] if 'items' in alert['MissionInfo'][
                        'missionReward'] else ''
            except BaseException:
                rew_items = ' - ' + \
                    alert['MissionInfo']['missionReward']['items'][0] if 'items' in alert['MissionInfo']['missionReward'] else ''
            rew_counteditems = ' - ' + str(alert['MissionInfo']['missionReward']['countedItems'][0]['ItemCount']) \
                + ' x ' + str(data_dict['L'][alert['MissionInfo']['missionReward']['countedItems'][0]['ItemType'].lower()]['value']) \
                if 'countedItems' in alert['MissionInfo']['missionReward'] else ''

            bc_counteditems = ['泥炭萃取物', '库狛', '库娃', '虚空光体']
            if rew_items == '' or '内融核心' in rew_items:
                if not any(item in rew_counteditems
                           for item in bc_counteditems):
                    break

            alert_text += '新警报任务!\n\n地点:' + data_dict['S'][alert['MissionInfo']['location']]['value'] + ' | ' + req_archwing + data_dict['M'][alert['MissionInfo']['missionType']]['value'] \
                + '\n等级:' + str(alert['MissionInfo']['minEnemyLevel']) + '-' + str(alert['MissionInfo']['maxEnemyLevel']) \
                + '\n奖励:' + str(rew_credits) + ' CR' + rew_items + rew_counteditems\
                + '\n时限:' + s2h(expiry)
    return alert_text
Ejemplo n.º 38
0
def parse():
    query_parameters = request.args
    url = query_parameters.get('url')
    if 'clear_cache' in query_parameters and query_parameters.get(
            'clear_cache') == '1':
        requests_cache.clear()

    try:
        a = Article(url, keep_article_html=True)
        a.download()
        a.parse()
        a.nlp()

        return jsonify({
            "author":
            ", ".join(a.authors),
            "source":
            a.source_url[a.source_url.find("//") + 2:].split("/")[0],
            "title":
            a.title,
            "image":
            a.top_image,
            "url":
            a.url,
            "publishedAt":
            a.publish_date,
            "html":
            a.article_html,
            "text":
            a.text,
            "summary":
            a.summary,
            "keywords":
            a.keywords,
        })
    except Exception as e:
        print(e)
        return jsonify({
            'error':
            True,
            'description':
            "'%s' parsing went wrong with error: '%s'" % (url, str(e))
        })
Ejemplo n.º 39
0
def run():
    liveness = 0
    while True:
        try:
            liveness += 1
            uid = db.get_next_queue()
            print('Get user[%s]\' relationship'%uid)
            get_relation(uid)
            if liveness <= 50:
                delay = random.randint(5, 60)
            elif liveness<=100:
                delay = random.randint(30, 180)
            else:
                delay = random.randint(600, 1800)
                if delay > 900:
                    liveness = 0
            _sleep(delay)
            requests_cache.clear()
        except Exception as e:
            print('Error:%s'%e)
            _sleep(300)
Ejemplo n.º 40
0
    def send_multi_request(self, paths):
        request_queue = []
        for path in paths:
            query_link = self.generate_request(const.LINK + path)
            request_queue.append(requests.Request(method='GET', url=query_link))

        i = 0
        while True:
            try:
                thottled_requests = self.bt.multi_submit(request_queue)
                responses = [tr.response for tr in thottled_requests]
                request_queue = []
                complete_response = []
                for response in responses:
                    if 'To many requests' in response.content or 'minimum delay' in response.content:
                        request_queue.append(response.url)
                        print response.content
                    elif 'you are now in a lockout period' in response.content:
                        raise Exception(response.content)
                    else:
                        complete_response.append(response.content)

                if len(request_queue) > 0:
                    raise requests.exceptions.ConnectionError

                return complete_response
            except AttributeError:
                for request in request_queue:
                    print 'Error with request: ', request
                raise
            except requests.exceptions.ConnectionError:
                requests_cache.clear()
                if i >= 10:
                    raise
                print 'Retrying Request'
                i += 1
                time.sleep(60)
Ejemplo n.º 41
0
 def setUp(self):
     requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
     requests_cache.clear()
Ejemplo n.º 42
0
    def setUp(self):
        self.top40 = top40.Top40(cache_duration=3600)

        #: Clear the cache, otherwise if a Python 2 test is run after a Python 3 test, then
        #: incorrect pickle format errors can occur
        requests_cache.clear()
Ejemplo n.º 43
0
            info[line['arch']]['md5'] = line['md5']
            info[line['arch']]['key-offset'] = line['offset']
        for line in data['url-offsets']:
            if line['arch'] not in info:
                raise VersionError('{} not in key-offsets.'.format(line['arch']))
            if line['md5'] != info[line['arch']]['md5']:
                raise VersionError('MD5s for {} do not match.'.format(line['arch']))
            info[line['arch']]['url-offset'] = line['offset']
        return info

    try:
        info = retrieve_version_info()
    except VersionError as e:
        if config['debug']:
            print('Version missing from cache. Clearing and retrieving again ...')
        requests_cache.clear()
        try:
            info = retrieve_version_info()
        except VersionError as e:
            if config['debug']:
                print('ERROR: Version is missing from wiki ({}).'.format(str(e).rstrip('.')), file=sys.stderr)
            else:
                print('ERROR: Version is missing from wiki.', file=sys.stderr)
            sys.exit(1)
    if 'versions' not in config:
        config['versions'] = {}
    config['versions'][args.version] = info
    with open('config.json', 'w') as fp:
        json.dump(config, fp, indent=2)
print(json.dumps(info, indent=2))
Ejemplo n.º 44
0
def cache_clear():
    requests_cache.clear()
    CACHE.urls = {}
    return 'OK'
Ejemplo n.º 45
0
def main():
    '''
    Our main application
    '''

    parser = op("usage ipblisted.py --ip [ip]")
    parser.add_option('--proxy', action="store", dest="proxy", help="Useful for when behind a proxy")
    parser.add_option('--proxy_user', action="store", dest="proxy_user")
    parser.add_option('--proxy_pass', action="store", dest="proxy_pass")
    parser.add_option('--good', default=False, action="store_true", dest="show_good", help="Displays lists that the IP did NOT show up on.")
    parser.add_option('--skip-dnsbl', default=False, action="store_true", dest="skip_dnsbl", help="Skips the checking DNS Blacklists")
    parser.add_option('--skip-bl', default=False, action="store_true", dest="skip_bl", help="Skips the checking of text based blacklists")
    parser.add_option('--no-cache', default=False, action="store_true", dest="no_cache", help="This will prevent caching of text based blacklists")
    parser.add_option('--clear-cache', default=False, action="store_true", dest="clear_cache", help="This will clear the existing cache")
    parser.add_option('--cache-timeout', default=60*60*12, action="store", dest="cache_timeout", help="Number of seconds before cache results are to expire (Default: 12 hours)")
    parser.add_option('--threads', default=5, action="store", dest="threads", help="Sets the number of feed search threads")
    parser.add_option('--infile', default=None, action="store", dest="infile", help="A newline separated list of IP addresses")
    parser.add_option('--ip', action="store", dest="ip")
    parser.add_option('-w','--wan', action="store_true", dest="wan", default=False, help="Will add your WAN ip to the list of IP addresses being checked.")
    parser.add_option('-f', '--format', action="store", dest="format", help="Set the output format for an outfile", default="csv")
    parser.add_option('-o', '--outfile', action="store", dest="outfile", help="Where to write the results", default=None)
    (options, args) = parser.parse_args()

    if options.format:
        allowed_formats = ['csv', 'xls', 'xlsx', 'txt']
        if not options.format in allowed_formats:
            cprint("[!] Invalid format \"{}\".  Please select a valid format {}".format(options.format, ', '.join(allowed_formats)), RED)
            sys.exit(1)

    if options.outfile:
        print("[*] Results will be saved to {} in {} format".format(options.outfile, options.format))

    # Check if the user supplied an IP address or IP block
    if options.ip is None and options.infile is None and options.wan is False:
        print("[!] You must supply an IP address, the WAN flag or a file containing IP addresses.")
        sys.exit(1)

    # Set our list of IPs to an empty list
    ips = []

    # Load up the IP in the --ip flag
    if options.ip:
        if '\\' in options.ip or '/' in options.ip:
            cprint("[!] Detected CIDR notation, adding all IP addresses in this range", BLUE)
            for ip in IPSet([options.ip]):
                ips += [str(ip)]
        elif len(options.ip.split(',')) > 0:
            ips += [ip for ip in options.ip.split(',') if ip != '']  # Handles when user does ,%20 
        else:
            ips += [options.ip]

    # If the user supplied a file load these as well
    if options.infile:
        ips += [ip for ip in file(options.infile).read().split('\n') if ip != '']

    if options.wan:
        ip = wan_ip()
        if ip:
            ips += [ip]
        else:
            cprint("[!] There was an issue trying to gather the WAN IP address.", RED)

    # Check if the user set their credentials when using a proxy
    if options.proxy:
        if options.proxy_user is None or options.proxy_pass is None:
            cprint("[!] Warning, no proxy credentials supplied.  Authenticated proxies may not work.", BLUE)
        else:
            options.proxy_pass = urllib.quote(options.proxy_pass)

    # Initialize a queue for the feeds to go in
    fq = Queue()

    # Load in all the feeds from the feed configuration file
    feeds = load_feeds({"skip_bl": options.skip_bl, "skip_dnsbl": options.skip_dnsbl})

    # Establish the requests cache
    if not options.no_cache:
        requests_cache.install_cache('ipblisted', expire_after=int(options.cache_timeout))

        # If the user wants to manually clear the cache, do it now
        if options.clear_cache:
            requests_cache.clear()

    # If there are no feeds set, just exit the program
    if len(feeds) == 0:
        cprint("[!] No feeds were defined, please define them in feeds.json or don't skip them all.", RED)
        sys.exit(1)

    # Final check to make sure we actually have a list of IP addresses to check
    if len(ips) == 0:
        cprint("[!] No IP addresses were listed to check.  Please check your syntax and try again.", RED)

    feed_results = []

    # Loop through each IP and find it
    print("[*] Checking {} IP addresses against {} lists".format(len(ips), len(feeds)))
    for ip in ips:

        print("[*] Searching Blacklist feeds for IP {ip}".format(ip=ip))

        # Build the feed requests queue
        oq = Queue()

        # Create a queue of all the feeds we want to check
        [fq.put(f) for f in feeds]
	qsize = fq.qsize()

        # Start up our threads and start checking the feeds
	threads = [FeedThread(ip, options, fq, oq) for i in range(0,options.threads)]
        [t.start() for t in threads]
        [t.join() for t in threads]

        # Set the number of lists we have found to 0
        find_count = 0

        # Go through each feed and see if we find the IP or block
        results = [r for r in oq.queue]

        if options.outfile:
            convert_results(results, ip, options.outfile)

        # Print out if the IP was found in any of the feeds
        for result in results:

            output = "[*] {name}: {found}".format(**result)

            if result["found"] == "Found":
                find_count += 1
                cprint(output,RED)
                continue

            if options.show_good:
                cprint(output)

        if find_count == 0:
            cprint("[*] Not found on any defined lists.", GREEN)
        else:
            cprint("[*] Found on {}/{} lists.".format(find_count,qsize), RED)
        print("[-]")
Ejemplo n.º 46
0
 def tearDown(self):
     requests_cache.clear()
Ejemplo n.º 47
0
 def clear_cache(self):
     if self.cache_name:
         requests_cache.clear()
     self.download()
Ejemplo n.º 48
0
 def clear_cache(self):
     requests_cache.clear()
Ejemplo n.º 49
0
    def request(
        self,
        uri=None,
        method="GET",
        data=None,
        params={},
        decode_json=True,
        headers={},
        stream=False,
    ):
        """Perform a HTTP requests.

        Parameters
        ----------
        uri : str
            The URI to use for the request.
        method : str
            The HTTP method to use for the request.
        data : dict
            Any data to send as part of a post request body.
        params : dict
            Query string parameters.
        decode_json : bool
            Decode response or not.
        headers : dict
            The HTTP request headers.
        stream: bool
            If response is streamed.

        Returns
        -------
        dict
            Decoded JSON response data as a dict object.
        """

        self.last_response = None

        if uri is None:
            uri = self.uri

        headers["User-Agent"] = self.USER_AGENT

        uri = "{}.json".format(uri)

        resp = None
        if "GET" == method:
            attempt = 0
            while attempt <= 5:
                resp = self.session.get(
                    uri, auth=self.auth, headers=headers, params=params, stream=stream
                )

                if resp.status_code not in list(range(500, 505)):
                    # No need to retry for if not a server error type.
                    break

                attempt += 1
                params["acapi_retry"] = attempt
                time.sleep((attempt ** 2.0) / 10)

            # We need to unset the property or it sticks around.
            if "acapi_retry" in params:
                del params["acapi_retry"]

        if "POST" == method:
            jdata = json.dumps(data)
            resp = self.session.post(
                uri, auth=self.auth, headers=headers, params=params, data=jdata
            )
            # This is a sledgehammer but fine grained invalidation is messy.
            if self.is_cache_enabled():
                requests_cache.clear()

        if "DELETE" == method:
            resp = self.session.delete(uri, auth=self.auth, headers=headers, params=params)
            # Quickest and easiest way to do this.
            if self.is_cache_enabled():
                requests_cache.clear()

        if hasattr(resp, "from_cache") and resp.from_cache:
            LOGGER.info("%s %s returned from cache", method, uri)

        self.last_response = resp

        try:
            resp.raise_for_status()
        except requests.exceptions.HTTPError as exp:
            LOGGER.info(
                "Failed request response headers: \n%s",
                pformat(exp.response.headers, indent=2),
            )
            raise

        if stream:
            return resp

        if decode_json:
            return resp.json()

        return resp.content
Ejemplo n.º 50
0
 def setUp(self):
     requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND)
     requests_cache.clear()
Ejemplo n.º 51
0
 def clear_cache(self):
     from requests_cache import clear
     clear()
Ejemplo n.º 52
0
    def request(self, uri=None, method='GET', data=None, params={}, decode_json=True):
        """Perform a HTTP requests.

        Parameters
        ----------
        uri : str
            The URI to use for the request.
        method : str
            The HTTP method to use for the request.
        auth : tuple
            The authentication credentials to use for the request.
        data : dict
            Any data to send as part of a post request body.
        params : dict
            Query string parameters.

        Returns
        -------
        dict
            Decoded JSON response data as a dict object.
        """
        self.last_response = None

        if None == uri:
            uri = self.uri

        headers = {'User-Agent': self.USER_AGENT}

        uri = '{}.json'.format(uri)

        if 'GET' == method:
            attempt = 0
            while attempt <= 5:
                resp = requests.get(uri, auth=self.auth, headers=headers, params=params)

                if resp.status_code not in range(500, 505):
                    # No need to retry for if not a server error type.
                    break

                attempt += 1
                params['acapi_retry'] = attempt
                time.sleep((attempt ** 2.0) / 10)


            # We need to unset the property or it sticks around.
            if 'acapi_retry' in params:
                del params['acapi_retry']

        if 'POST' == method:
            jdata = json.dumps(data)
            resp = requests.post(uri, auth=self.auth, headers=headers, params=params, data=jdata)
            # This is a sledgehammer but fine grained invalidation is messy.
            requests_cache.clear()

        if 'DELETE' == method:
            resp = requests.delete(uri, auth=self.auth, headers=headers, params=params)
            # Quickest and easiest way to do this.
            requests_cache.clear()

        if hasattr(resp, 'from_cache') and resp.from_cache:
            LOGGER.info("%s %s returned from cache", method, uri)

        self.last_response = resp

        if resp.status_code != requests.codes.ok:
            try:
                raise resp.raise_for_status()
            except requests.exceptions.HTTPError as exp:
                LOGGER.info("Failed request response headers: \n%s",
                            pformat(exp.response.headers, indent=2))
                raise

        if decode_json:
            return resp.json()

        return resp.content
Ejemplo n.º 53
0
 def clear_cache(self):
     ''' Clear the globally installed cache. '''
     try:
         requests_cache.clear()
     except:
         pass