def crawl_data(subject, startDate): print('crawling data from gradcafe...') _subject = quote(subject) _startDate = np.datetime64(startDate) manager = urllib3.PoolManager() # start crawling data page = 1 allResponse = [] exitSignal = False while not exitSignal: print(f'fetching page {page}') # send a GET request r = manager.request('GET', 'https://www.thegradcafe.com/survey/index.php', fields={ 'q': _subject, 't': 'a', 'o': '', 'p': repr(page) }) if r.status != 200: raise RuntimeError( f'unable to fetch page {page} (HTTP response {r.status})') response = parse_response(r.data) # sort the results by date response.sort(key=lambda x: x['date'], reverse=True) if response[-1]['date'] < _startDate: exitSignal = True # filter out unwanted results validResponse = list( filter(lambda x: x['date'] >= _startDate, response)) allResponse.extend(validResponse) continue allResponse.extend(response) # add a bit of interval between requests time.sleep(0.05) page += 1 print('fetched {} records from gradcafe'.format(len(allResponse))) # save all response to file with open('all_response.pickle', 'wb') as outfile: pickle.dump(allResponse, outfile)
def _process_select_response(self, resp): out = {} for key, value in resp.iteritems(): key = key.upper() if key == "OK": continue elif key in ("EXISTS", "RECENT", "UIDNEXT", "UIDVALIDITY"): value = int(value[0]) elif key in ("FLAGS", "PERMANENTFLAGS"): value = parse_response(value)[0] elif key == "READ-WRITE": value = True out[key] = value return out
def _proc_folder_list(self, folder_data): # Filter out empty strings and None's. # This also deals with the special case of - no 'untagged' # responses (ie, no folders). This comes back as [None]. folder_data = [item for item in folder_data if item not in ("", None)] ret = [] parsed = parse_response(folder_data) while parsed: raw_flags, delim, raw_name = parsed[:3] parsed = parsed[3:] flags = [imap_utf7.decode(flag) for flag in raw_flags] ret.append((flags, delim, self._decode_folder_name(raw_name))) return ret
def namespace(self): """Return the namespace for the account as a (personal, other, shared) tuple. Each element may be None if no namespace of that type exists, or a sequence of (prefix, separator) pairs. For convenience the tuple elements may be accessed positionally or using attributes named *personal*, *other* and *shared*. See `RFC 2342 <http://tools.ietf.org/html/rfc2342>`_ for more details. """ data = self._command_and_check("namespace") return Namespace(*parse_response(data))
def _process_select_response(self, resp): out = {} for key, value in resp.iteritems(): key = key.upper() if key == 'OK': continue elif key in ('EXISTS', 'RECENT', 'UIDNEXT', 'UIDVALIDITY'): value = int(value[0]) elif key in ('FLAGS', 'PERMANENTFLAGS'): value = parse_response(value)[0] elif key == 'READ-WRITE': value = True out[key] = value return out
def namespace(self): """Return the namespace for the account as a (personal, other, shared) tuple. Each element may be None if no namespace of that type exists, or a sequence of (prefix, separator) pairs. For convenience the tuple elements may be accessed positionally or attributes named "personal", "other" and "shared". See RFC 2342 for more details. """ typ, data = self._imap.namespace() self._checkok('namespace', typ, data) return Namespace(*parse_response(data))
def folder_status(self, folder, what=None): """Return the status of *folder*. *what* should be a sequence of status items to query. This defaults to ``('MESSAGES', 'RECENT', 'UIDNEXT', 'UIDVALIDITY', 'UNSEEN')``. Returns a dictionary of the status items for the folder with keys matching *what*. """ if what is None: what = ("MESSAGES", "RECENT", "UIDNEXT", "UIDVALIDITY", "UNSEEN") elif isinstance(what, basestring): what = (what,) what_ = "(%s)" % (" ".join(what)) data = self._command_and_check("status", self._encode_folder_name(folder), what_, unpack=True) _, status_items = parse_response([data]) return dict(as_pairs(status_items))
""" The Smart Contract caller can pass in data to be processed, this can be urls to fetch and required headers and parameters additional named parameters for post processing could be specified too to make this code more generic These could be a set of regular expressions or parse rules to post-process the data need to be careful that any of the params passed will not be an issue for the requests lib or for the url called """ # parse env args arg = [os.environ['ARG0'], os.environ['ARG1']] # parse 3rd arg into kwargs if available if 'ARG2' in os.environ: kwargs = ast.literal_eval(os.environ['ARG2']) else: kwargs = {} # attempt the request req = requests.request(arg[0], arg[1], **kwargs) # check if post processor params were included # if post processor params were included then call the parser # parsed_response_text = response_parser.parse_response(req, **kwargs) parsed_response_text = parse_response(req, **kwargs) # print text result on single line # print(req.text.replace('\n','')) print(parsed_response_text.replace('\n','')) # option if always json # print(json.loads(req.text))
def _parse_untagged_response(text): assert text.startswith("* ") text = text[2:] if text.startswith(("OK ", "NO ")): return tuple(text.split(" ", 1)) return parse_response([text])
def search_items(keywords, search_index="All", item_page=1): default_api = DefaultApi( access_key=access_key, secret_key=secret_key, host=host, region=region ) """ Specify the category in which search request is to be made """ """ For more details, refer: https://webservices.amazon.com/paapi5/documentation/use-cases/organization-of-items-on-amazon/search-index.html """ """ Specify item count to be returned in search result """ item_count = 20 """ Choose resources you want from SearchItemsResource enum """ """ For more details, refer: https://webservices.amazon.com/paapi5/documentation/search-items.html#resources-parameter """ search_items_resource = [ SearchItemsResource.ITEMINFO_TITLE, SearchItemsResource.OFFERS_LISTINGS_PRICE, SearchItemsResource.IMAGES_PRIMARY_LARGE, SearchItemsResource.OFFERS_LISTINGS_SAVINGBASIS, SearchItemsResource.ITEMINFO_FEATURES, SearchItemsResource.OFFERS_LISTINGS_PROMOTIONS, SearchItemsResource.OFFERS_LISTINGS_CONDITION, SearchItemsResource.OFFERS_LISTINGS_ISBUYBOXWINNER ] """ Forming request """ try: search_items_request = SearchItemsRequest( partner_tag=partner_tag, partner_type=PartnerType.ASSOCIATES, keywords=keywords, search_index=search_index, item_count=item_count, resources=search_items_resource, item_page=item_page ) except ValueError as exception: print("Error in forming SearchItemsRequest: ", exception) return try: """ Sending request """ response = default_api.search_items(search_items_request) print("Request received") res = parse_response(response) if response.errors is not None: print("\nPrinting Errors:\nPrinting First Error Object from list of Errors") print("Error code", response.errors[0].code) print("Error message", response.errors[0].message) return res except ApiException as exception: print("Error calling PA-API 5.0!") print("Status code:", exception.status) print("Errors :", exception.body) print("Request ID:", exception.headers["x-amzn-RequestId"]) except TypeError as exception: print("TypeError :", exception) except ValueError as exception: print("ValueError :", exception) except Exception as exception: print("Exception :", exception)