Ejemplo n.º 1
0
def crawl_data(subject, startDate):
    print('crawling data from gradcafe...')
    _subject = quote(subject)
    _startDate = np.datetime64(startDate)

    manager = urllib3.PoolManager()

    # start crawling data
    page = 1
    allResponse = []
    exitSignal = False

    while not exitSignal:
        print(f'fetching page {page}')

        # send a GET request
        r = manager.request('GET',
                            'https://www.thegradcafe.com/survey/index.php',
                            fields={
                                'q': _subject,
                                't': 'a',
                                'o': '',
                                'p': repr(page)
                            })
        if r.status != 200:
            raise RuntimeError(
                f'unable to fetch page {page} (HTTP response {r.status})')

        response = parse_response(r.data)

        # sort the results by date
        response.sort(key=lambda x: x['date'], reverse=True)

        if response[-1]['date'] < _startDate:
            exitSignal = True
            # filter out unwanted results
            validResponse = list(
                filter(lambda x: x['date'] >= _startDate, response))
            allResponse.extend(validResponse)
            continue

        allResponse.extend(response)

        # add a bit of interval between requests
        time.sleep(0.05)
        page += 1

    print('fetched {} records from gradcafe'.format(len(allResponse)))

    # save all response to file
    with open('all_response.pickle', 'wb') as outfile:
        pickle.dump(allResponse, outfile)
Ejemplo n.º 2
0
 def _process_select_response(self, resp):
     out = {}
     for key, value in resp.iteritems():
         key = key.upper()
         if key == "OK":
             continue
         elif key in ("EXISTS", "RECENT", "UIDNEXT", "UIDVALIDITY"):
             value = int(value[0])
         elif key in ("FLAGS", "PERMANENTFLAGS"):
             value = parse_response(value)[0]
         elif key == "READ-WRITE":
             value = True
         out[key] = value
     return out
Ejemplo n.º 3
0
    def _proc_folder_list(self, folder_data):
        # Filter out empty strings and None's.
        # This also deals with the special case of - no 'untagged'
        # responses (ie, no folders). This comes back as [None].
        folder_data = [item for item in folder_data if item not in ("", None)]

        ret = []
        parsed = parse_response(folder_data)
        while parsed:
            raw_flags, delim, raw_name = parsed[:3]
            parsed = parsed[3:]
            flags = [imap_utf7.decode(flag) for flag in raw_flags]
            ret.append((flags, delim, self._decode_folder_name(raw_name)))
        return ret
Ejemplo n.º 4
0
    def namespace(self):
        """Return the namespace for the account as a (personal, other,
        shared) tuple.

        Each element may be None if no namespace of that type exists,
        or a sequence of (prefix, separator) pairs.

        For convenience the tuple elements may be accessed
        positionally or using attributes named *personal*, *other* and
        *shared*.

        See `RFC 2342 <http://tools.ietf.org/html/rfc2342>`_ for more details.
        """
        data = self._command_and_check("namespace")
        return Namespace(*parse_response(data))
Ejemplo n.º 5
0
 def _process_select_response(self, resp):
     out = {}
     for key, value in resp.iteritems():
         key = key.upper()
         if key == 'OK':
             continue
         elif key in ('EXISTS', 'RECENT', 'UIDNEXT', 'UIDVALIDITY'):
             value = int(value[0])
         elif key in ('FLAGS', 'PERMANENTFLAGS'):
             value = parse_response(value)[0]
         elif key == 'READ-WRITE':
             value = True
                              
         out[key] = value
     return out
Ejemplo n.º 6
0
    def namespace(self):
        """Return the namespace for the account as a (personal, other, shared) tuple.

        Each element may be None if no namespace of that type exists,
        or a sequence of (prefix, separator) pairs.

        For convenience the tuple elements may be accessed
        positionally or attributes named "personal", "other" and
        "shared".

        See RFC 2342 for more details.
        """
        typ, data = self._imap.namespace()
        self._checkok('namespace', typ, data)
        return Namespace(*parse_response(data))
Ejemplo n.º 7
0
    def folder_status(self, folder, what=None):
        """Return the status of *folder*.

        *what* should be a sequence of status items to query. This
        defaults to ``('MESSAGES', 'RECENT', 'UIDNEXT', 'UIDVALIDITY',
        'UNSEEN')``.

        Returns a dictionary of the status items for the folder with
        keys matching *what*.
        """
        if what is None:
            what = ("MESSAGES", "RECENT", "UIDNEXT", "UIDVALIDITY", "UNSEEN")
        elif isinstance(what, basestring):
            what = (what,)
        what_ = "(%s)" % (" ".join(what))

        data = self._command_and_check("status", self._encode_folder_name(folder), what_, unpack=True)
        _, status_items = parse_response([data])
        return dict(as_pairs(status_items))
Ejemplo n.º 8
0
"""
The Smart Contract caller can pass in data to be processed, this can be urls to fetch and required headers and parameters

additional named parameters for post processing could be specified too to make this code more generic 
These could be a set of regular expressions or parse rules to post-process the data 
need to be careful that any of the params passed will not be an issue for the requests lib or for the url called
"""

# parse env args
arg = [os.environ['ARG0'], os.environ['ARG1']]

# parse 3rd arg into kwargs if available
if 'ARG2' in os.environ: kwargs = ast.literal_eval(os.environ['ARG2'])
else: kwargs = {}

# attempt the request
req = requests.request(arg[0], arg[1], **kwargs)

# check if post processor params were included

# if post processor params were included then call the parser
# parsed_response_text = response_parser.parse_response(req, **kwargs)
parsed_response_text = parse_response(req, **kwargs)

# print text result on single line
# print(req.text.replace('\n',''))
print(parsed_response_text.replace('\n',''))

# option if always json
# print(json.loads(req.text))
Ejemplo n.º 9
0
def _parse_untagged_response(text):
    assert text.startswith("* ")
    text = text[2:]
    if text.startswith(("OK ", "NO ")):
        return tuple(text.split(" ", 1))
    return parse_response([text])
Ejemplo n.º 10
0
def search_items(keywords, search_index="All", item_page=1):
    default_api = DefaultApi(
        access_key=access_key, secret_key=secret_key, host=host, region=region
    )

    """ Specify the category in which search request is to be made """
    """ For more details, refer: https://webservices.amazon.com/paapi5/documentation/use-cases/organization-of-items-on-amazon/search-index.html """

    """ Specify item count to be returned in search result """
    item_count = 20

    """ Choose resources you want from SearchItemsResource enum """
    """ For more details, refer: https://webservices.amazon.com/paapi5/documentation/search-items.html#resources-parameter """
    search_items_resource = [
        SearchItemsResource.ITEMINFO_TITLE,
        SearchItemsResource.OFFERS_LISTINGS_PRICE,
        SearchItemsResource.IMAGES_PRIMARY_LARGE,
        SearchItemsResource.OFFERS_LISTINGS_SAVINGBASIS,
        SearchItemsResource.ITEMINFO_FEATURES,
        SearchItemsResource.OFFERS_LISTINGS_PROMOTIONS,
        SearchItemsResource.OFFERS_LISTINGS_CONDITION,
        SearchItemsResource.OFFERS_LISTINGS_ISBUYBOXWINNER
    ]

    """ Forming request """
    try:
        search_items_request = SearchItemsRequest(
            partner_tag=partner_tag,
            partner_type=PartnerType.ASSOCIATES,
            keywords=keywords,
            search_index=search_index,
            item_count=item_count,
            resources=search_items_resource,
            item_page=item_page
        )
    except ValueError as exception:
        print("Error in forming SearchItemsRequest: ", exception)
        return

    try:
        """ Sending request """
        response = default_api.search_items(search_items_request)
        print("Request received")
        res = parse_response(response)

        if response.errors is not None:
            print("\nPrinting Errors:\nPrinting First Error Object from list of Errors")
            print("Error code", response.errors[0].code)
            print("Error message", response.errors[0].message)
        return res

    except ApiException as exception:
        print("Error calling PA-API 5.0!")
        print("Status code:", exception.status)
        print("Errors :", exception.body)
        print("Request ID:", exception.headers["x-amzn-RequestId"])

    except TypeError as exception:
        print("TypeError :", exception)

    except ValueError as exception:
        print("ValueError :", exception)

    except Exception as exception:
        print("Exception :", exception)