Example #1
0
def find_request(tree):
    requests = []
    request = tree.xpath(
        u"//dd[@class='job_request']/p/span[position()>1]/text()")

    requests.append(request)
    return requests
Example #2
0
 def parse_links(self, response):
     x = HtmlXPathSelector(response)
     urls = x.select('//td[@class="contenttext"]/a/@href').extract()
     requests = []
     for url in urls:
         requests.append(Request(url="http://www.senate.gov" + url))
     return requests
Example #3
0
    def write_request(self, request_id, chatmessages):
        requests = []
        columns = ['RequestID', 'Index', 'Source', 'AudioPath', 'Transcript']
        for i, msg in enumerate(chatmessages):
            audio = os.path.basename(msg.audio_path)
            request = {
                'RequestID': request_id,
                'Index': i,
                'Source': msg.source,
                'AudioPath': audio,
                'Transcript': msg.utterance
            }
            requests.append(request)
        if self.mongoclient is not None and self.mongoclient.client is not None:
            try:
                mongocollection = self.mongoclient.client['chatbot'][
                    'requests']
                result = mongocollection.insert_many(requests)
                logger.info("Added requests to mongodb")
            except Exception as ex:
                self.mongoclient.client = None
                logger.error(traceback.format_exc())
                logger.warn("Deactivate mongodb")

        df = pd.DataFrame(requests)
        if not os.path.isfile(self.requests_fname):
            with open(self.requests_fname, 'w') as f:
                f.write(','.join(columns))
                f.write('\n')
        df.to_csv(self.requests_fname,
                  mode='a',
                  index=False,
                  header=False,
                  columns=columns)
        logger.info("Write request to {}".format(self.requests_fname))
 def parse_links(self, response):
     x = HtmlXPathSelector(response)
     urls = x.select('//td[@class="contenttext"]/a/@href').extract()
     requests = []
     for url in urls:
         requests.append(Request(url="http://www.senate.gov" + url))
     return requests
    def add_dnc(self, phone_numbers, dnc_list_id):
        """
        Adds phone numbers to a DNC list of choice
        Args:
            phone_numbers (``list``): Phone numbers to add to DNC
            dnc_list (``str``): DNC list id to add contact(s) to
        Returns:
            results (``dict``): Dict of phone numbers and DNC lists added to
            errors (``list``): List of errors and failures
        """
        if not isinstance(phone_numbers, list):
            raise TypeError("add_dnc expects a list of phone numbers. If you intend to only add one number to the "
                            "do-not-contact list, add a list of length 1")

        url = "{}/v1/dnc_contacts/".format(self.api_domain)
        requests = []
        for number in phone_numbers:
            data = {"dnc": "{}/v1/dnc_lists/{}/".format(self.api_domain, dnc_list_id), 'phone_number': number}
            requests.append({"func": self.session.post,
                             "func_params": {"url": url, "data":data},
                             "expected_status": 201})

        responses, errors = self._handle_requests(requests, retry=True)
        dnc_records = [request.json() for request in responses]
        results = self.pretty_format_dnc_data(dnc_records)
        return results, errors
Example #6
0
	def Execute(uuid,servers,parameters):
		# TODO - async option
		clc.v2.SetCredentials(bpformation.CONTROL_USER, bpformation.CONTROL_PASSWORD)
		requests = []
		start_time = time.time()
		for server in servers:
			bpformation.output.Status('SUCCESS',3,"Execution request submitted for %s" % server.lower())
			if parameters:  parameters = dict( (p.split("=")[0],p.split("=",1)[1]) for p in parameters )
			else:  parameters=None
			requests.append(clc.v2.Server(server,alias=bpformation.web.Alias()).ExecutePackage(
					package_id=re.sub("[^a-zA-Z0-9]","",uuid).lower(),   # Wants uuid w/o dashes
					parameters=parameters,
				))

		requests = sum(requests)
		requests.WaitUntilComplete()
		if len(requests.success_requests):
			success_servers = [ o.data['context_val'] for o in requests.success_requests ]
			bpformation.output.Status('SUCCESS',3,"Execution completed on %s (%s seconds)" % (", ".join(success_servers),int(time.time()-start_time)))
		for request in requests.error_requests:
			(req_loc,req_id) = request.id.split("-",1)
			r = bpformation.web.CallScrape("GET","/Blueprints/Queue/RequestDetails/%s?location=%s" % (req_id,req_loc))
			if r.status_code<300 and r.status_code>=200:
				error = re.search('<div class="module-body">.*?<pre>(\s*.Error.\s*)?(.*?)\s*</pre>',r.text,re.DOTALL).group(2)
				bpformation.output.Status('ERROR',3,"Execution failed on %s: %s" % (request.data['context_val'],error))
			else:
				bpformation.output.Status('ERROR',3,"Execution failed on %s request ID %s (https://control.ctl.io/Blueprints/Queue/RequestDetails/%s?location=%s)" % \
						(request.data['context_val'],req_id,req_id,req_loc))
Example #7
0
def goods_info(request, goods_id):
    api = get_api()
    owner = api.get_user(nickname=request.user.username)
    nickname = owner.nickname

    user = api.get_user(nickname=nickname)
    company = api.get_company(company_id=user.company_id)
    is_administrator = False
    if company:
        is_administrator = api.is_administrator(owner.id, company.id)

    check = nickname == request.user.username
    show = not check or company != None
    is_my_company = True

    goods = api.get_goods(goods_id)
    date = goods.get_date()

    body_type = _get_body_type(goods.body_type)
    download_type = DOWNLOAD_TYPE[int(goods.download_type)]
    goods_info = [
        body_type + ", " + download_type,
        str(goods.weigh) + " т., " + str(goods.volume) +
        " м³, количество ремней: " + str(goods.belt_count),
        "Количество машин: " + str(goods.car_count)
    ]

    price = [
        str(goods.rate) + " " + PRICES[int(goods.price)],
        FORM_PRICES[int(goods.form_price)]
    ]

    requests = []
    all_request = api.get_requests(goods_id=goods.id)
    for _request in all_request:
        user_request = api.get_user(_request.user_id)
        avatar = _get_avatar(user_request.avatar)

        date_create = _request.date_create.strftime("%d.%m.%Y, %H:%M")
        request_date = None
        if _request.date:
            request_date = _request.date.strftime("%d.%m.%Y, %H:%M")

        requests.append(
            (_request, user_request, avatar, date_create, request_date))

    return render(
        request, 'trans/goods_info.html', {
            'company': company,
            'is_my_company': is_my_company,
            'requests': requests,
            'goods': goods,
            'date': date,
            'goods_info': goods_info,
            'prices': price,
            'is_administrator': is_administrator,
            'show': show,
            'nickname': user.nickname,
            'my_company': is_add_car(user)
        })
Example #8
0
	def requests(self, reqs):
		requests = E.requests()
		postdata = {}
		for req in reqs:
			requests.append(req.tree)
			postdata.update(req.form)

		tree = (
			E.client(
				requests
			)
		)

		xml = etree.tostring(tree, encoding='UTF-8', xml_declaration=True) # declaration uses ' instead of "

		globaldata = {
			'client_version': config.client_version,
			'xml': xml
		}

		postdata.update(globaldata)

		r = self.s.post('https://client.whatpulse.org/v1.1/', verify=pkg_resources.resource_filename(__name__, 'whatpulse.pem'), data=postdata)
		print(r.text)
		tree = etree.fromstring(r.text, self.parser)
		ress = tree.xpath('/server/responses/response')

		ret = []
		for res in ress:
			ret.append(Response.parse(res))

		return ret
Example #9
0
    def getDNS(self):
        requests = []
        for reqObj in self.json["dns"]:
            respObj: cuckooReportDNSRequest = cuckooReportDNSRequest(reqObj)
            requests.append(respObj)

        return requests
Example #10
0
    def getHTTP(self):
        requests = []
        for reqObj in self.json["http"]:
            respObj: cuckooReportHTTPRequest = cuckooReportHTTPRequest(reqObj)
            requests.append(respObj)

        return requests
Example #11
0
    def getICMP(self):
        requests = []
        for reqObj in self.json["icmp"]:
            respObj: cuckooReportICMP = cuckooReportICMP(reqObj)
            requests.append(respObj)

        return requests
Example #12
0
def queryOneWayTrip(origins, destinations, startDate, startDateRange, alliance):
    requests = []
    results = []
    maxStops = 1
    maxConnectionDuration = 300
    if not (alliance in ["STAR", "ONEWORLD", "SKYTEAM"]) :
        alliance = ""
    for i in range(startDateRange):
        start = startDate + timedelta(days = i)
        for origin in origins:
            for destination in destinations:
                print start, origin, destination
                request = generateRequestJSON(origin, destination, maxStops, [start], alliance, maxConnectionDuration)
                # pprint(request)
                requests.append(request)
                result = sendRequest(request)
                result['startDate'] = start.strftime("%Y-%m-%d")
                result['returnDate'] = ""
                result['origin'] = origin
                result['destination'] = destination
                results.append(result)

    saveToFile(results, '/tmp/pyFare_oneway_trip_results_' + origins[0] + '_' + time.strftime("%Y%m%d") + "_" + str(int(time.time())) + '.json')
    # Process raw data
    processResultJson(results, 1)
Example #13
0
	def requests(self, reqs):
		requests = E.requests()
		postdata = {}
		for req in reqs:
			requests.append(req.tree)
			postdata.update(req.form)

		tree = (
			E.client(
				requests
			)
		)

		xml = etree.tostring(tree, encoding='UTF-8', xml_declaration=True) # declaration uses ' instead of "

		globaldata = {
			'client_version': config.client_version,
			'xml': xml
		}

		postdata.update(globaldata)

		r = self.s.post('https://client.whatpulse.org/v1.1/', verify='whatpulse.pem', data=postdata)
		print(r.text)
		tree = etree.fromstring(r.text, self.parser)
		ress = tree.xpath('/server/responses/response')

		ret = []
		for res in ress:
			ret.append(Response.parse(res))

		return ret
Example #14
0
    def parse(self, response):
        print "====start %s==" % response.url
        # print response.body
        time.sleep(random.randint(self.time_interval, 2))

        # test the status of hbase and thrift server
        if self.test_hbase:
            try:
                self.htable = HBaseTest(host=self.tool.HOST_HBASE1, table="origin")
                self.htable.close_trans()
                self.test_hbase = False
            except:
                raise CloseSpider("no thrift or hbase server!")

        # 未成功获取query
        if response.url == self.domain_url:
            print "error of query"
            return

        # 抽取并解析新闻网页内容
        items = self.parse_items(response)
        # 构造一个Xpath的select对象,用来进行网页元素抽取
        sel = Selector(response)

        requests = []
        for url in sel.xpath(u'//a[@class="np"]/@href').extract():
            requests.append(self.make_requests_from_url(self.domain_url + url))

        for item in items:
            yield Request(url=item["url"], meta={"item": item}, callback=self.parse_content)

        for request in requests:
            continue
            yield request
Example #15
0
    def get_interface_requests(self, configs, have):
        requests = []
        if not configs:
            return requests

        # Create URL and payload
        for conf in configs:
            name = conf["name"]
            if self.delete_flag and name.startswith('Loopback'):
                method = DELETE
                url = 'data/openconfig-interfaces:interfaces/interface=%s' % quote(
                    name, safe='')
                request = {"path": url, "method": method}
            else:
                # Create Loopback in case not availble in have
                if name.startswith('Loopback'):
                    have_conf = next(
                        (cfg for cfg in have if cfg['name'] == name), None)
                    if not have_conf:
                        loopback_create_request = build_interfaces_create_request(
                            name)
                        requests.append(loopback_create_request)
                method = PATCH
                url = 'data/openconfig-interfaces:interfaces/interface=%s/config' % quote(
                    name, safe='')
                payload = self.build_create_payload(conf)
                request = {"path": url, "method": method, "data": payload}
            requests.append(request)

        return requests
Example #16
0
    def del_entities(self, entity):
        # return if fields are empty
        if self.domain_prefix.get() == "" or self.token.get() == "":
            self.text.insert(END, "ERROR - Fill required details" + "\n")
            self.text.see(END)
            self.text.update()
            return

        endpoint = self.generate_endpoint(entity)

        if entity == "GiftCards":
            self.void_gift_cards(entity, endpoint)
            return False

        result = ""

        print self.domain_prefix.get() + '-' + entity + '.csv'

        try:
            with open(self.domain_prefix.get() + '-' + entity + '.csv',
                      'rb') as vcsv:
                reader = csv.DictReader(vcsv)
                headers = self.setup_headers()
                requests = []

                print self.number_of_rows(entity)

                for index, row in enumerate(reader):
                    requests.append(
                        grequests.delete(endpoint %
                                         (self.domain_prefix.get(), row['id']),
                                         headers=headers,
                                         stream=False))

                # batch requests into groups of 300
                # on any batch that has any response of 429, wait N seconds (determined by retry_time_handler)
                # continue until another 429, or no more requests
                # at first, let's just stop when we hit a 429 ( exit(0) )

                self.send_requests(requests)

        except IndexError as e:
            print "index error"
            print e
        except LookupError as e:
            print "lookup error"
            print e
        except:
            print sys.exc_info()
            response = []
            print result

            self.text.insert(
                END,
                "ERROR - couldn't open the CSV perhaps, check the terminal" +
                "\n")
            self.text.see(END)
            self.text.update()

        return False
Example #17
0
    def getRecommendationRequests(self):
        requests = []
        for request in self.chooseRequests:
            if issubclass(request, BxRecommendationRequest):
                requests.append(request)

        return requests
Example #18
0
def get_requests(filter_type="movies", priority_only=False):
    """
    :param filter: movies or episodes
    :param priority_only: filter requests without priority
    """
    with sqlite3.connect(REQUESTS_DB) as conn:
        result = conn.execute("select * from requests where used=0").fetchall()
        requests = []
        for i in result:
            is_episode_ = is_episode(i[1])

            if filter_type == "movies" and is_episode_:
                continue

            if filter_type == "episodes" and not is_episode_:
                continue

            requests.append({
                "user": i[0],
                "comment": i[1],
                "type": i[2],
                "movie": i[3],
                "content": i[4].split("|"),
                "id": i[5],
                "verified": i[7],
                "priority": i[8],
            })

        random.shuffle(requests)

        if priority_only:
            return [request for request in requests if request.get("priority")]

        return requests
Example #19
0
def scrape_many(pages, delay=delay):
    session = FuturesSession()
    requests = []
    for page in pages:
        requests.append(session.get(page))
        time.sleep(delay)
    return (requests)
Example #20
0
File: main.py Project: syfun/gqlcli
def export_postman(ctx, name: str, header):
    """Export all client query to postman."""
    schema = ctx.obj['schema']
    headers = make_headers(header)
    requests = []
    query_type, mutation_type = schema.query_type, schema.mutation_type
    if query_type:
        for op, field in query_type.fields.items():
            requests.append(
                make_postman_request(op, build_client(field, op, 'query'),
                                     headers))
    if mutation_type:
        for op, field in mutation_type.fields.items():
            requests.append(
                make_postman_request(op, build_client(field, op, 'mutation'),
                                     headers))

    data = {
        'info': {
            'name':
            name,
            'schema':
            'https://schema.getpostman.com/json/collection/v2.1.0/collection.json',
        },
        'item': requests,
        'protocolProfileBehavior': {},
    }
    with open(f'{name}.json', 'w') as f:
        json.dump(data, f, indent=2)
def create_sheet():
    service = build('sheets', 'v4', credentials=creds)
    all_sheet_data = service.spreadsheets().get(
        spreadsheetId=spreadsheet_id).execute()

    last_month_title = all_sheet_data["sheets"][-1]["properties"]["title"]
    past_month_income_range = "{}!G3".format(last_month_title)
    num_indices = len(all_sheet_data["sheets"])

    requests = []
    requests.append({
        "duplicateSheet": {
            "sourceSheetId": 2042501166,
            "insertSheetIndex": num_indices,
            "newSheetName": month_year
        }
    })
    body = {'requests': requests}

    # duplicate the "default" sheet so that a sheet for the new month is created
    response = service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id,
                                                  body=body).execute()

    # get the income from last month, store in body variable to pass into the new month's sheet later
    response2 = service.spreadsheets().values().get(
        spreadsheetId=spreadsheet_id, range=past_month_income_range).execute()
    last_month_income = [[response2["values"][0][0]]]
    last_month_income_body = {"values": last_month_income}

    # put last month's income into the "PREV_MONTH_INCOME" for the new month
    response3 = service.spreadsheets().values().update(
        spreadsheetId=spreadsheet_id,
        range=PREV_MONTH_INCOME_range,
        body=last_month_income_body,
        valueInputOption="USER_ENTERED").execute()
Example #22
0
def scan_and_push(url, endpoint, token, path):
    requests = []
    for dirpath, _, _ in os.walk(path):
        if os.path.exists(join(dirpath, CONFIG_NAME)):
            task_name, submission_id = do_path(url, endpoint, token, dirpath)
            requests.append((dirpath, task_name, submission_id))
    return requests
Example #23
0
    def template(self, mapping: dict, slide_ids: list = []) -> None:
        """Replaces all text encaspulated with `{{ <TEXT> }}` with input.

        :param mapping: Dictionary mapping old text to new text
        :type mapping: dict
        :param slide_ids: The slides to apply template on. If none, then all slides
            will be considered.
        :type slide_ids: list, optional
        """

        requests = []
        for key, val in mapping.items():
            json = {
                "replaceAllText": {
                    "replaceText": val,
                    "pageObjectIds": slide_ids,
                    "containsText": {
                        "text": f"{{{{ {key} }}}}",
                        "matchCase": False
                    },
                }
            }
            requests.append(json)
        service: Any = creds.slide_service
        logger.info("Templating data")
        service.presentations().batchUpdate(
            presentationId=self.presentation_id,
            body={
                "requests": requests
            },
        ).execute()
        logger.info("Data successfully templated")
Example #24
0
    def start_requests(self):
        requests = []

        for school in self.get_schools():
            # Construct an Item for each school and start with
            # requesting each school's overview pages.
            request = Request(school['schoolvo_detail_url'])

            item = {}
            for field, value in settings.get('SCHOOLVO_FIELD_MAPPING')\
                                    .iteritems():
                schoolvo_value = school.get(value, None)

                # If there is a string value in de SchoolVO data, it is
                # probably entered by a human, so strip trailing/leading
                # whitespaces
                if schoolvo_value and type(schoolvo_value) == unicode:
                    schoolvo_value = schoolvo_value.strip()
                item[field] = schoolvo_value

            request.meta['item'] = SchoolVOItem(item)

            if school['pad_logo'] and school['pad_logo'].startswith('/'):
                request.meta['item']['logo_img_url'] = '%s%s'\
                    % (settings['SCHOOLVO_URL'], school['pad_logo'][1:])

            if school['pad_gebouw'] and school['pad_gebouw'].startswith('/'):
                request.meta['item']['building_img_url'] = '%s%s'\
                    % (settings['SCHOOLVO_URL'], school['pad_gebouw'][1:]),

            requests.append(request)

        return requests
Example #25
0
def myProfile():
    logged_user = getUserInfo(login_session['user_id'])
    friend_ids = session.query(Association).filter_by(user_id=logged_user.id,
                                                      confirmed=1).all()
    friendList = []
    for f_id in friend_ids:
        friendList.append(
            session.query(User).filter_by(id=f_id.friend_id).one())
    bdays = session.query(Event).filter_by(type='birthday',
                                           user_id=logged_user.id)
    birthdays = []
    for bday in bdays:
        photos = session.query(Media).filter_by(event_id=bday.id,
                                                type='photo').all()
        birthdays.append({
            'id': bday.id,
            'year': bday.year,
            'description': bday.description,
            'photos': photos
        })

    friend_requests = []
    friend_requests = session.query(Association).filter_by(
        friend_id=logged_user.id, confirmed=0).all()
    requests = []
    for fr in friend_requests:
        requests.append(session.query(User).filter_by(id=fr.user_id).one())

    return render_template("myprofile.html",
                           logged_user=logged_user,
                           friendList=friendList,
                           birthdays=birthdays,
                           requests=requests)
Example #26
0
def do_push(sliver_hosts, portnum, payload):
    """
    Push a payload to a list of slivers.
    NOTE: this has to be done in one go, since we can't import grequests
    into the global namespace (without wrecking havoc on the credential server),
    but it has to stick around for the push to work.
    """

    global TESTING, CONFIG

    from gevent import monkey

    if TESTING:
        monkey.patch_all()

    else:
        # make gevents runnabale from multiple threads (or Django will complain)
        monkey.patch_all(socket=True,
                         dns=True,
                         time=True,
                         select=True,
                         thread=False,
                         os=True,
                         ssl=True,
                         httplib=False,
                         aggressive=True)

    import grequests

    # fan-out
    requests = []
    for sh in sliver_hosts:
        rs = grequests.post("http://" + sh + ":" + str(portnum),
                            data={"observer_message": payload},
                            timeout=getattr(CONFIG,
                                            "SYNDICATE_HTTP_PUSH_TIMEOUT", 60))
        requests.append(rs)

    # fan-in
    responses = grequests.map(requests)

    assert len(responses) == len(
        requests), "grequests error: len(responses) != len(requests)"

    for i in xrange(0, len(requests)):
        resp = responses[i]
        req = requests[i]

        if resp is None:
            logger.error("Failed to connect to %s" % (req.url))
            continue

        # verify they all worked
        if resp.status_code != 200:
            logger.error("Failed to POST to %s, status code = %s" %
                         (resp.url, resp.status_code))
            continue

    return True
Example #27
0
    def match_spectra_from_path(
        self,
        mgf_path: str,
        n_best: int,
        include_metadata: List[str] = None,
        ion_mode: str = "positive",
    ) -> List[pd.DataFrame]:
        """
        Finds the N best matches for spectra in a local mgf file using spec2vec algorithm.

        Parameters
        ----------
        mgf_path: str
            Local path to mgf file
        n_best: int
            Number of best matches to select
        include_metadata: List[str]
            Metadata keys to include in the response. Will make response slower. Please
            check the documentation for a list of valid keys.
        ion_mode: str
            Selects which model will be used for the predictions: Either a model trained with
            positive or negative ion mode spectra data. Defaults to positive.

        Returns
        -------
        A list of pandas dataframes containing the best matches and optionally metadata
        for these matches.

        """
        # validates input
        if ion_mode not in ["positive", "negative"]:
            raise ValueError(
                "Parameter ion_mode should be either set to 'positive' or 'negative. Defaults to 'positive'.'"
            )

        parameters = self._build_parameters(n_best, include_metadata)
        # loads spectra
        spectra_generator = load_from_mgf(mgf_path)

        # defines endpoint based on user choice of spectra ion mode
        endpoint = self._PREDICT_ENDPOINT_BASE.format(ion_mode=ion_mode)

        # issue requests respecting the spectra limit per request
        batch = []
        requests = []
        for spectrum in spectra_generator:
            batch.append(spectrum)
            if len(batch) == SPECTRA_LIMIT_PER_REQUEST:
                payload = self._build_payload(batch, parameters)
                requests.append(self._send_request(payload, endpoint))
                batch = []
        if batch:
            payload = self._build_payload(batch, parameters)
            requests.append(self._send_request(payload, endpoint))

        predictions = []
        for r in requests:
            predictions.extend(self._format_results(r))
        return predictions
Example #28
0
def build_wrk_requests(segmentlist, method='GET'):
    requests = []
    for url in segmentlist:
        parsed_url = urlparse(url)
        requests.append(method + " " + parsed_url.path + " HTTP/1.1\r\n" +
                        "Host: " + parsed_url.netloc + "\r\n" +
                        "Connection: Close\r\n\r\n")
    return requests
Example #29
0
def main(wordlist):
    global pool
    global finished
    global tasks

    print(f'{fc.cyan}[+]{fc.end} Checking [{fc.orange}{url}{fc.end}] ...')
    if not verify_vuln():
        print(f'{fc.red}[+]{fc.end} Shutting down.')
        sys.exit(0)

    # latin1 supports most common wordlists (including rockyou.txt)
    with open(wordlist, 'r', encoding='latin1') as inp:
        allpasses = inp.readlines()

    allpasses = [p.strip('\n') for p in allpasses]  # strip newlines

    total = len(allpasses)

    # xmlrpc allows for multiple passwords at once, so split the list into small lists
    passlist = []
    for i in range(0, total, stride):
        passlist.append(allpasses[i:i + stride])

    # total requests that have to be made
    tasks = len(passlist)

    print(f'{fc.cyan}[+]{fc.end} Loaded [{fc.cyan}{total}{fc.end}] passwords')
    print(f'{fc.green}[+]{fc.end} Configuration:')
    print(f'      - [{fc.green}user{fc.end}     ] {fc.orange}{user}{fc.end}')
    print(f'      - [{fc.green}wp-url{fc.end}   ] {fc.orange}{url}{fc.end}')
    print(
        f'      - [{fc.green}wordlist{fc.end} ] {fc.orange}{wordlist}{fc.end}')
    print(f'      - [{fc.green}pass/req{fc.end} ] {fc.orange}{stride}{fc.end}')
    print(f'      - [{fc.green}requests{fc.end} ] {fc.orange}{tasks}{fc.end} ')
    print(
        f'      - [{fc.green}processes{fc.end}] {fc.orange}{processes}{fc.end}'
    )
    print(f'{fc.cyan}[+]{fc.end} Start brute forcing...')

    finished = 0

    try:
        with Pool(processes=processes) as pool:
            requests = []
            for passwords in passlist:
                requests.append(
                    pool.apply_async(send_passwords,
                                     args=(passwords, ),
                                     callback=callback))

            pool.close()  # accept no more jobs
            pool.join()  # wait for all jobs to finish
    except:
        print()
        print(f'{fc.red}Aborted{fc.end}')
        sys.exit(0)
    print()
    print(f'{fc.cyan}[+]{fc.end} Finished')
    def resources(k8s_object, k8s_object_list, headers, v, ns, l, logger):
        data, config_not_defined, limits, requests, both = [], [], [], [], []
        for item in k8s_object_list.items:
            k8s_object_name = item.metadata.name
            if 'pods' in k8s_object:
                containers = item.spec.containers
            else:
                containers = item.spec.template.spec.containers
            for container in containers:
                if container.resources.limits is not None and \
                container.resources.requests is not None:
                    data.append([item.metadata.namespace, k8s_object_name, \
                    container.name, u'\u2714', u'\u2714'])
                    both.append(True)
                elif container.resources.limits is None and \
                container.resources.requests is not None:
                    data.append([item.metadata.namespace, k8s_object_name, \
                    container.name, u'\u2717', u'\u2714'])
                    requests.append(True)
                elif container.resources.limits is not None and \
                container.resources.requests is None:
                    data.append([item.metadata.namespace, k8s_object_name, \
                    container.name, u'\u2714', u'\u2717'])
                    limits.append(True)
                else:
                    data.append([item.metadata.namespace, k8s_object_name, \
                    container.name, u'\u2717', u'\u2717'])
                    config_not_defined.append(False)
        logger.info ("resource definition: {} {}".format(len(k8s_object_list.items), \
        k8s_object))
        data_no_resources = Output.bar(config_not_defined,data, \
        "containers found without resources defined in running", \
        k8s_object, Output.RED, l, logger)
        data_requests = Output.bar(requests,data, \
        "containers found with only requests defined in running",
        k8s_object, Output.YELLOW, l, logger)
        data_limits = Output.bar(limits,data, \
        "containers found with only limits defined in running", \
        k8s_object, Output.YELLOW, l, logger)
        data_all = Output.bar(both,data, \
        "containers found with both limits and requests defined in running", \
        k8s_object, Output.GREEN, l, logger)
        Output.print_table(data, headers, v, l)
        Output.csv_out(data, headers, k8s_object, 'resource_definition', ns)

        # creating analysis data for logging
        analysis = {
            "container_property": "resources",
            "total_container_count": len(data),
            "no_resources_defined_containers": data_no_resources,
            "only_limits_defined_containers": data_limits,
            "only_requests_defined_containers": data_requests,
            "all_resources_defined_containers": data_all
        }
        json_data = Output.json_out(data, analysis, headers, k8s_object,
                                    'resource_definition', ns)

        return json_data
Example #31
0
def requests_for_user(user):
    requests = []
    user_items = user.item_set.all()
    for item in user_items:
        for i in item.requestitem_set.filter(status="requested"):
            requests.append(i)
    requests_count = len(requests)

    return requests, requests_count
Example #32
0
def requests_for_user(user):
    requests = []
    user_items = user.item_set.all()
    for item in user_items:
        for i in item.requestitem_set.filter(status="requested"):
            requests.append(i)
    requests_count = len(requests)

    return requests, requests_count
Example #33
0
 def sign_pki_csr(self, filename, selector):
     requests = []
     for request in pem.parse_file(filename):
         content = "".join(request.as_text().splitlines()[1:-1])
         resp = self.post(amt.wsman.sign_pki_csr(self.path, content, "InstanceID", selector))
         rv = _return_value(resp, AMT_PublicKeyManagementService)
         signed_request = _find_node(resp, AMT_PublicKeyManagementService, "SignedCertificateRequest")
         requests.append((rv, None if signed_request is None else signed_request.text))
     return requests
Example #34
0
def get_citation_requests(submission):
    # Remove MoreComments objects from comment forest
    submission.comments.replace_more(limit=0)
    requests = []
    # Iterate through comment forest
    for comment in submission.comments.list():
        # Search for 'citation needed' in body of comment
        if re.search('citation needed', comment.body, re.IGNORECASE):
            requests.append(comment)
    return requests
Example #35
0
def curr_month_futers(type_dataset, product, start_year):
    ''' формируем список всех будущих запросов по годам и месяцам''' 
    requests = []
    year = np.arange(start_year, now.year+1, 1)
    month = np.array(['F', 'G', 'H', 'J', 'K', 'M', 'N','Q', 'U', 'V', 'X', 'Z'])
    for y in year:
        for m in month:
            s = type_dataset + product + m + str(y)
            requests.append(s)
    return requests
Example #36
0
        def request_callback(method, uri, headers):
            if uri.startswith(BUGZILLA_BUGLIST_URL):
                body = bodies_csv.pop(0)
            elif uri.startswith(BUGZILLA_BUG_URL):
                body = bodies_xml.pop(0)
            else:
                body = bodies_html[len(requests) % 2]

            requests.append(httpretty.last_request())

            return (200, headers, body)
Example #37
0
def view(pcap_id):

    pending_tasks = memcache.get(str(pcap_id) + "_tasks")
    total_tasks = memcache.get(str(pcap_id) + "_total_tasks")

    if pending_tasks != None:
        print "There are %s pending tasks" % pending_tasks

    if total_tasks != None:
        print "There are %s tasks" % total_tasks

    pid = {"_id.pcap_id": ObjectId(pcap_id)}

    # FIXME: this map/reduce is executed each time view is requested
    map = Code(
        "function () {"
        "  emit({ pcap_id : this['pcap_id'], UA : this.UA, 'user-agent' : this['user-agent']}, {malicious: this.tags.malicious, clean: this.tags.clean, suspicious:this.tags.suspicious});"
        "}"
    )

    reduce = Code(
        "function (key, vals) {"
        "  var result = {malicious:0, suspicious:0, clean:0 };"
        "  vals.forEach(function (value) {result.malicious += value.malicious; result.clean += value.clean; result.suspicious += value.suspicious; });"
        "  return result;"
        "}"
    )

    results = db.analysis.map_reduce(map, reduce, "malicious")

    found = results.find(pid)
    requests = []

    for i in found:
        # print i
        requests.append(i)

    original_request = db.requests.find_one({"pcap_id": ObjectId(pcap_id)})

    original_ua = ""

    try:
        if original_request:
            original_ua = original_request["headers"]["user-agent"]
    except KeyError:
        pass

    return render_template(
        "view.html",
        requests=requests,
        original_ua=original_ua,
        pending_tasks=int(pending_tasks),
        total_tasks=int(total_tasks),
    )
Example #38
0
 def batch(self, calls):
     """
     Executes a batch request (with same method) but different parameters
     against a JSON-RPC interface
     """
     requests = []
     for call in calls:
         request = {"jsonrpc": "2.0",
                    "method": call['method'],
                    "params": call['params'],
                    "id": call['id']}
         requests.append(request)
     responseJSON = self._execute(requests)
     return responseJSON
Example #39
0
def parse_test_file(file):
    requests = []
    with open(file) as f:
        request = None
        for line in f.readlines():
            if line.startswith('GET http://') or line.startswith('POST http://'):
                if request is not None:
                    if request.startswith('GET'):
                        request = request.strip() + '\r\n\r\n'
                    requests.append(request)
                request = line.strip() + '\r\n'
            else:
                request += line.strip() + '\r\n'         
    return requests
def decrypt_other_byte(ints, byte_idx):
    retval = [None]
    pool = Pool(10)
    requests = []
    jobs = []
    for i in range(256):
        copy_ints = ints[:]
        copy_ints[byte_idx] = i
        params = {'auth': hexlify(''.join([chr(x) for x in copy_ints]))}
        request = AsyncRequest('GET', BASE_URL, params=params, session=session, callback=create_hook(pool, i, retval))
        requests.append(request)
        jobs.append(pool.spawn(request.send))
        if not retval[0] is None:
            break
    gevent.joinall(jobs)
    return retval[0]
Example #41
0
def _load_requests(dump):
    decoded = json.loads(dump)
    requests = []
    for d in decoded:
        try:
            capture_dts = _strptime(d['capture_dts'])
        except:
            capture_dts = None
        target_ssid = d['target_ssid']
        if target_ssid:
            target_ssid = repr(target_ssid)[2:-1]
        request = ProbeRequest(d['source_mac'], capture_dts,
                               target_ssid=target_ssid,
                               signal_strength=d['signal_strength'])
        requests.append(request)
    return requests
Example #42
0
def do_push( sliver_hosts, portnum, payload ):
    """
    Push a payload to a list of slivers.
    NOTE: this has to be done in one go, since we can't import grequests
    into the global namespace (without wrecking havoc on the credential server),
    but it has to stick around for the push to work.
    """
    
    global TESTING, CONFIG
    
    from gevent import monkey
    
    if TESTING:
       monkey.patch_all()
    
    else:
       # make gevents runnabale from multiple threads (or Django will complain)
       monkey.patch_all(socket=True, dns=True, time=True, select=True, thread=False, os=True, ssl=True, httplib=False, aggressive=True)
    
    import grequests
    
    # fan-out 
    requests = []
    for sh in sliver_hosts:
      rs = grequests.post( "http://" + sh + ":" + str(portnum), data={"observer_message": payload}, timeout=getattr(CONFIG, "SYNDICATE_HTTP_PUSH_TIMEOUT", 60) )
      requests.append( rs )
      
    # fan-in
    responses = grequests.map( requests )
    
    assert len(responses) == len(requests), "grequests error: len(responses) != len(requests)"
    
    for i in xrange(0,len(requests)):
       resp = responses[i]
       req = requests[i]
       
       if resp is None:
          logger.error("Failed to connect to %s" % (req.url))
          continue 
       
       # verify they all worked 
       if resp.status_code != 200:
          logger.error("Failed to POST to %s, status code = %s" % (resp.url, resp.status_code))
          continue
          
    return True
Example #43
0
 def start_requests(self):
     url = 'http://www.ziguan123.com/ajax/productrank'
     requests = []
     formdata = {
         "operator_type": "27",
         "policyfirstlevel": "89,90,91,92,93",
         "policy": "0",
         "fundtype": "",
         "terms": "1",
         "sermonth": "2016-08",
         "sort_name": "Month1",
         "sort_type": "desc",
         "page_index": "1",
         "page_size": "40"
                }
     request = FormRequest(url, callback=self.parse, formdata=formdata)
     requests.append(request)
     return requests
def handle_agencylist(response):
    logger.info(response.url)
    agencies_tree = etree.fromstring(response.content)
    has_error = handle_error(agencies_tree, response.url, handle_agencylist)
    if has_error:
        return False
    save_response(response, os.path.join(FILE_ROOT, "agency_list.xml"))
    agency_ids = agencies_tree.xpath('//agency/@tag')
    requests = []
    for agency_id in agency_ids:
        url = NEXTBUS_URL + urlencode({
            "command": "routeList",
            "a": agency_id,
        })
        hooks = {
            "response": get_routelist_handler(agency_id),
        }
        requests.append(async.get(url, hooks=hooks))
    async.map(requests)
Example #45
0
def main():
    urls = {}
    requests = []
    session = FuturesSession(max_workers=10)
    for year in YEARS_TO_PARSE:
        landing_page = SEARCH_LANDING + '&year=' + str(year)
        landing_res = session.get(landing_page).result()
        landing_bs = BS(landing_res.content, 'html5lib')
        number_span = landing_bs.select('li.ep_tag_selected span')[0].text
        number_of_question = int(re.findall(r'\d+', number_span)[0])
        number_of_pages = math.ceil(
            number_of_question / 10)  # change to per page
        for page_num in range(1, number_of_pages + 1):
            res = session.get(landing_page + '&currentPage=' + str(page_num))
            requests.append(res)
        for request in tqdm(requests):
            try:
                request_result = request.result()
            except ConnectionError:
                print(
                    'Due to the ConnectionError page {} hasn\'t been parsed'.format(page_num))
                continue
            page = BS(request_result.content, "html5lib")
            if page:
                for notice in page.select('.results div.notice'):
                    for url in notice.select('ul.documents li a'):
                        title_text = notice.select(
                            'p.title a.result_details_link')[0].text
                        title_date = notice.select(
                            'div.date_reference span.date')[0].text
                        question_format = url.get('href').split('.')[-1]
                        title = '{} ({}).{}'.format(
                            title_text, title_date, question_format)
                        title = re.sub(r'[\n\r\t]', '', title)
                        title = title.replace('/', '-')
                        urls[url.get('href')] = title
            else:
                break
    if not os.path.exists(FOLDER_TO_DOWNLOAD):
        os.mkdir(FOLDER_TO_DOWNLOAD)
    download(urls, FOLDER_TO_DOWNLOAD)
def decrypt_rightmost_byte(ints, byte_idx):
    pool = Pool(10)
    requests = []
    jobs = []
    for i in range(256):
        copy_ints = ints[:]
        copy_ints[byte_idx] = i
        params = {'auth': hexlify(''.join([chr(x) for x in copy_ints]))}
        request = AsyncRequest('GET', BASE_URL, params=params, session=session)
        requests.append(request)
        jobs.append(pool.spawn(request.send))
    gevent.joinall(jobs)
    found_i = []
    for i in range(256):
        request = requests[i]
        if request.response.content != 'padding error':
            found_i.append(i)
    if len(found_i) == 1:
        return found_i[0]
    else:
        raise ValueError("more than one byte resulted in good padding, need to change second byte to find which results in last byte 1")
Example #47
0
def get_registered_users(user_ids):
    """Filter users that exist in Stackalytics (and therefore in Launchpad).

    :param user_ids: list of user_id items
    :returns: list of user_ids which are registered in Launchpad/Stackalytics.
    """
    session = requests_futures.sessions.FuturesSession(max_workers=10)
    requests = list()
    for user in user_ids:
        req = session.get(STACKALYTICS_URL, params={'user_id': user})
        requests.append(req)

    assert(len(user_ids) == len(requests))
    result = list()
    for user, req in zip(user_ids, requests):
        r = req.result()
        if r.status_code == 200:
            result.append(user)
        else:
            LOG.warning("User_id '%s' is not registered in Launchpad", user)
    return result
Example #48
0
def queryRoundTrip(origins, destinations, startDate, startDateRange, returnDate, returnDateRange, maxStops, alliance):
    requests = []
    results = []
    maxConnectionDuration = 300
    for i in range(startDateRange):
        for j in range(returnDateRange):
            start = startDate + timedelta(days = i)
            end = returnDate + timedelta(days = j)
            for origin in origins:
                for destination in destinations:
                    print start, end, origin, destination
                    request = generateRequestJSON(origin, destination, maxStops, [start, end], alliance, maxConnectionDuration)
                    requests.append(request)
                    result = sendRequest(request)
                    result['startDate'] = start.strftime("%Y-%m-%d")
                    result['returnDate'] = end.strftime("%Y-%m-%d")
                    result['origin'] = origin
                    result['destination'] = destination
                    results.append(result)

    saveToFile(results, '/tmp/pyFare_round_trip_results_' + '_' + str(int(time.time())) + '.json')
    # Process raw data
    processResultJson(results, 0)
 def handle_routelist(response):
     logger.info(response.url)
     dir = os.path.join(FILE_ROOT, agency_id)
     if not os.path.exists(dir):
         os.mkdir(dir)
     routelist_tree = etree.fromstring(response.content)
     has_error = handle_error(routelist_tree, response.url, get_routelist_handler(agency_id))
     if has_error:
         return False
     save_response(response, os.path.join(dir, "route_list.xml"))
     route_ids = routelist_tree.xpath('//route/@tag')
     requests = []
     for route_id in route_ids:
         url = NEXTBUS_URL + urlencode({
             "command": "routeConfig",
             "a": agency_id,
             "r": route_id,
         })
         hooks = {
             "response": get_routeconfig_handler(agency_id, route_id),
         }
         requests.append(async.get(url, hooks=hooks))
     async.map(requests)
Example #50
0
def view(hash):

    h = { "_id.hash" : hash }

    #requests = db.analysis.find(h)    

    # FIXME: this map/reduce is executed each time view is requested
    map = Code("function () {"
        "  emit({ hash : this['id'], UA : this.UA, 'user-agent' : this['user-agent']}, {malicious: this.tags.malicious, clean: this.tags.clean, suspicious:this.tags.suspicious});"
        "}")

    reduce = Code("function (key, vals) {"
        "  var result = {malicious:0, suspicious:0, clean:0 };"
        "  vals.forEach(function (value) {result.malicious += value.malicious; result.clean += value.clean; result.suspicious += value.suspicious; });"
        "  return result;"
        "}")

    results = db.analysis.map_reduce(map, reduce, 'malicious')

    found = results.find(h)
    requests = []

    for i in found:
        #print i
        requests.append(i)

    original_request = db.requests.find_one({"id": hash})


    original_ua = ''

    if original_request:
        original_ua = original_request['headers']['user-agent']


    return render_template('view.html', requests=requests, original_ua=original_ua)
Example #51
0
	def requested(self):
		requests = []
		for request in self.updates()['result'].get('friends_response', [])['added_friends']:
			requests.append(request['name'])
		return requests
def synchronizeDeftRequests():
# update Production request status (caveat do not process user's requests)
    error = 0
    # connect to Oracle
    (pdb,dbcur,deftDB) = connectDEFT('R')
    t_table_Tasks    = "%s.%s"%(deftDB,deft_conf.daemon['t_production_task'])
    t_table_Requests = "%s.%s"%(deftDB,deft_conf.daemon['t_prodmanager_request'])
    t_table_Request_State = "%s.%s"%(deftDB,deft_conf.daemon['t_prodmanager_request_status'])

    request_update_list = ''
    for r in Request_update_statesL :
        request_update_list += "'%s',"%(r)
    request_update_list = request_update_list[0:(len(request_update_list)-1)] 
    sql = "SELECT taskid,pr_id,chain_tid, status,step_id FROM %s "%(t_table_Tasks)
    sql+= "WHERE status IN (%s) "%(request_update_list)
    sql+= "AND taskid > %s "%(MIN_DEFT_TASK_ID)
    sql+= "AND TIMESTAMP > current_timestamp - %s "%(REQUEST_SYNCH_INTERVAL)
    sql+= "AND project NOT LIKE '%s' "%('user%')
    sql+="ORDER BY TASKID, PR_ID, STEP_ID"
    print sql
    tasksDEFT = DButils.QueryAll(pdb,sql)

    requests      = []
    done_requests = []
    final_requests= []
    sql_update    = []

    if len(tasksDEFT) : 
     # select list of requests
     for t in tasksDEFT :
        task_id = t[0]
        req_id  = t[1]
        try :
            req_id = int(req_id)
        except :
            print "WARNING. Unknown request ID : %s (Task ID : %s)"%(req_id,task_id)
        requests.append(req_id)
     requests.sort()

     rold = -1
     for  r in requests :
      if r != rold : 
             final_requests.append(r)
      rold = r
    else :
        print "INFO. NO new tasks in the last % hours"%(REQUEST_TIME_INTERVAL_HOURS)


    for request in final_requests :
      sql = "SELECT req_s_id, pr_id, status FROM %s WHERE PR_ID=%s "%(t_table_Request_State,request)
      reqDEFT = DButils.QueryAll(pdb,sql)
      for r_s_s in reqDEFT :
        r_step_id = r_s_s[0]
        r_req_id  = r_s_s[1]
        r_status  = r_s_s[2]
        status    = r_status
        print "INFO. Process request : %s, Step : %s Current state : %s"%(r_req_id,r_step_id,r_status)
        # now go through list of tasks and find task with for request and step
        for t in tasksDEFT :
            task_id = t[0]
            req_id  = t[1]
            step_id = t[4]
            if req_id == r_req_id and step_id == r_step_id :
                task_status = t[3].lower()
                if task_status == 'registered' :
                    if r_status == 'approved' or r_status == 'registered' or r_status =='waiting' :\
                            r_status = 'processed'
                if task_status == 'running' :
                    if r_status == 'approved' or r_status == 'registered' or r_status =='waiting' or r_status == 'processed':\
                            r_status = 'executing'
                if  task_status == 'done' :
                    # check was it the last task in chain
                    done_requests.append(task_id)
                if r_status != status :
                    status = r_status
                    sql_update.append(sql)
                    # insert new record into t_prodmanager_request_status table
                    sql = "INSERT INTO %s "%(t_table_Request_State)
                    sql+= "(REQ_S_ID,COMMENT,OWNER,STATUS,TIMESTAMP,PR_ID) "
                    sql+= "VALUES(%s,'%s','%s',%s,current_timestamp,'%s'"%\
                        (step,'automatic update','ProdManager',status,request)
                    print sql
                    sys.exit(1)
    DButils.closeDB(pdb,dbcur)
    dbupdate = True
    if dbupdate :
            (pdb,dbcur,deftDB) = connectDEFT('W')
            for sql in sql_update :
                if verbose : print sql
                DButils.QueryUpdate(pdb,sql)
            DButils.QueryCommit(pdb)
            DButils.closeDB(pdb,dbcur)
    elif db_update == False :
            print "INFO. No database update : db_update = %s"%(db_update)
Example #53
0
    def _inventory_lookup(self, sku_list):
        """
        Returns inventory data for the given list of skus.  This may imply
        multiple api calls to shipwire's api for each warehouse.  This can
        probably be threaded, but that is outside of the scope of the common
        api class.

        This function should return a dict where each key is a
        warehouse code, and the value is a list of Inventory object
        instances.

        Like so:
        { "warehouse" : [<Inventory>, ...] }
        """

        product_template = "<ProductCode>{0}</ProductCode>"
        req_template = """
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE InventoryUpdateResponse SYSTEM "http://www.shipwire.com/exec/download/InventoryUpdateResponse.dtd">
<InventoryUpdate>
    <Username>{0}</Username>
    <Password>{1}</Password>
    <Server>{2}</Server>
    <Warehouse>{3}</Warehouse>
    {4}
    <IncludeEmpty/>
</InventoryUpdate>
        """.strip()

        def gen_req(warehouse, sku_list):
            product_lines = "\n".join(
                [product_template.format(sku) for sku in sku_list])
            return req_template.format(
                self.__email, 
                self.__pass, 
                self.__server, 
                warehouse, 
                product_lines)
            
        requests = []
        for warehouse in WAREHOUSE_CODES:
            requests.append(gen_req(warehouse, sku_list))

        class ReqThread(Thread):
            """
            Performing the requests one after another is too slow.  This class
            wraps the post_and_fetch call so that several can be ran
            in parallel.
            """
            def __init__(self, api, request):
                self.data = ""
                self.req = request
                self.api = api
                Thread.__init__(self)
            def run(self):
                self.data = self.api.post_and_fetch(
                    self.req, "InventoryServices.php")

        start_time = time.time()
        pool = [ReqThread(self, req) for req in requests]
        for thread in pool:
            thread.start()
        for thread in pool:
            thread.join()
        total = time.time()-start_time
        responses = [thread.data for thread in pool]

        report = {}
        for warehouse, raw in zip(WAREHOUSE_CODES, responses):
            # note that lxml blows up when you pass it unicode
            fileob = StringIO(str(raw))
            root = etree.parse(fileob).xpath("/InventoryUpdateResponse")[0]
            items = []
            for entry in root.xpath("Product"):
                inv = Inventory()
                inv.code = entry.attrib["code"]
                inv.quantity = int(entry.attrib["quantity"])
                items.append(inv)
            report[warehouse] = items

        return report
Example #54
0
    def start_requests(self):
        requests = []

        for school in self.get_schools():
            # Construct an Item for each school and start with
            # requesting each school's overview pages.
            request = Request(school['schoolvo_detail_url'])

            item = {}
            for field, value in SCHOOLVO_FIELD_MAPPING.iteritems():
                schoolvo_value = school.get(value, None)
                # If there is a string value in de SchoolVO data, it is
                # probably entered by a human, so strip trailing/leading
                # whitespaces
                if schoolvo_value\
                        and type(schoolvo_value) == unicode\
                        and field != 'phone':
                    schoolvo_value = schoolvo_value.strip()
                    try:
                        schoolvo_value = int(schoolvo_value)
                    except:
                        pass
                item[field] = schoolvo_value

            address = {
                'street': school.get('adres', None),
                'city': school.get('woonplaats', None),
                'zip_code': school.get('postcode', None),
            }

            for k, v in address.items():
                if v:
                    if k == 'zip_code':
                        v = v.replace(' ', '')
                    address[k] = v.strip()

            address['geo_location'] = {
                'lat': school.get('latitude', None),
                'lon': school.get('longitude', None)
            }

            item['address'] = address

            identifiers = item['schoolvo_code'].strip().split('-')
            item['board_id'] = int(identifiers[0])
            item['brin'] = identifiers[1]
            item['branch_id'] = int(identifiers[2])

            request.meta['item'] = SchoolVOItem(item)

            if school['pad_logo'] and school['pad_logo'].startswith('/'):
                request.meta['item']['logo_img_url'] = '%s%s'\
                    % (settings['SCHOOLVO_URL'], urllib.quote(\
                        school['pad_logo'][1:].encode('utf8')))

            if school['pad_gebouw'] and school['pad_gebouw'].startswith('/'):
                request.meta['item']['building_img_url'] = '%s%s'\
                    % (settings['SCHOOLVO_URL'], urllib.quote(\
                        school['pad_gebouw'][1:].encode('utf8')))

            requests.append(request)

        return requests
Example #55
0
        r["follows"] = r["follows"]["count"]
        r["followed_by"] = r["followed_by"]["count"]
        r["media_count"] = r["media"]["count"]
        return r
    except:
        return None

client = MongoClient()
all_users = client["project"]["all"]
instagram_users = client["project"]["instagram_data"]
requests = []

records = all_users.find({"filtered" : False}).limit(5000)

for record in records:
    requests.append(grequests.get("https://www.instagram.com/{}/".format(record["username"])))
    
print "================================"
print "Requests is ready, start mapping"
print "================================"

responses = grequests.map(requests)

for r in responses:
    try:
        print r
        if r.status_code == 200:
            info = user_info(r.text)
            if info:
                print "Adding - {}".format(info["username"])
                instagram_users.insert(info)
Example #56
0
    def __init__(self,dataset_code):
        """Parse codeGroupe page and stores parameters"""
        
        # Value of codeGroupe for this dataset
        self.dataset_code = dataset_code
        # Iterator counter
        self.iter = 0
        # Time of the variable last updated initialization
        self.lastUpdate = datetime.datetime(1900,1,1)
        
        # Parse parameters
        url = "http://www.bdm.insee.fr/bdm2/choixCriteres?request_locale=en&codeGroupe=" + dataset_code
        fh = Insee.open_url_and_check(self,url)
        page = BeautifulSoup(fh,"lxml")
        #        page = BeautifulSoup(urllib.request.urlopen("http://localhost:8800/insee/rub"))
        h1 = page.find('h1')
        self.dataset_name = h1.string
        f = page.find('form',id='listeSeries')
        codes_nbr = collections.defaultdict(list)
        self.codes_desc = collections.defaultdict(list)
        multiselect = {}
        size = {}
        self.nbrCriterium = 0
        for field in f.find_all('fieldset'):
            self.nbrCriterium += 1
            legend = field.find('legend').string
            legend = re.match(re.compile('(.*) (\(.*\))'),legend).group(1)
            id = field.find('select')
            if id is None:
                for input in field.find_all('input'):
                    code = input['name']
                    size[code] = 1
                    codes_nbr[code].append(input['value'])
                    label = field.find('label')
                    self.codes_desc[legend].append([label.string,label.string])
                    size[code] += 1
            else:
                code = id['name']
                size[code] = 0
                for option in field.find_all('option'):
                    if "selected" in option:
                        codes_nbr[code] = option['value']
                        self.codes_desc[legend] = [option.string,option.string]
                        break
                    else:
                        codes_nbr[code].append(option['value'])
                        self.codes_desc[legend].append([option.string,option.string])
                    size[code] += 1
                multiselect[code] = field.find('input')['name']

        if dataset_code == '158':
            self.params = self.params_158(codes_nbr,multiselect)
            return
        
        # Establish heuristic iteration stategy so as not to request more than 100 variables at a time
        # Implementation is limited to 3 criteria or less
        if self.nbrCriterium > 4:
            raise TooManyCriteriaError("Dataset_code: {} has {} Criteria.".format(self.dataset_code,self.nbrCriterium))
        # Storage for request parameters
        self.params = []
        # Total number of variables    
        total_size = 1
        for s in size.values():
            total_size *= s
        # Different strategies to build requests and construct params
        if total_size > 100:
            sstar = 0
            kstar = []
            for k in size.keys():
                s = total_size/size[k]
                if (s < 100) and (s > sstar):
                    # Search for smallest criterium that let us
                    # get chunks of less than 100 variables by
                    # combining the two other criteria
                    sstar = s
                    kstar = k
            if sstar > 0:
                # Run one iteration around optimal criterium
                for c1 in codes_nbr[kstar]:
                    requests = []
                    requests.append((kstar, c1))
                    if kstar in multiselect:
                        requests.append((multiselect[kstar],''))
                    for k in codes_nbr.keys():
                        if k != kstar:
                            requests += [(k,c2) for c2 in codes_nbr[k]]
                            if k in multiselect:
                                requests.append((multiselect[k],''))
                            self.params.append(self.build_request_params(requests))
            else:
                # Run iterations on two criteria
                # Assumes that no criterium is larger than 100 
                # Criterium keys sorted by size of criterium
                ks = sorted(size,key=lambda k:size[k])
                # Number of times the larges criterium can be
                # read before reaching 100 variables
                n = round(100/size[ks[self.nbrCriterium-1]]-0.5)
                # Iterate over smallest criteria
                if self.nbrCriterium == 3:
                    for c1 in codes_nbr[ks[0]]:
                        requests = []
                        requests.append((ks[0], c1))
                        if ks[0] in multiselect:
                            requests.append((multiselect[ks[0]],''))
                        # Iterates over optimal chunks of intermediary size criterium
                        n1 = 0
                        for c2 in codes_nbr[ks[1]][0:size[ks[1]]:n]:
                            requests1 = requests + [(ks[1],c3) for c3 in codes_nbr[ks[1]][n1:(n1+n)]]
                            if ks[1] in multiselect:
                                requests1.append((multiselect[ks[1]],''))
                            n1 += n
                            # Combine with largest criterium as a whole
                            requests1 += [(ks[2],c3) for c3 in codes_nbr[ks[2]]]
                            if ks[2] in multiselect:
                                requests1.append((multiselect[ks[2]],''))
                            self.params.append(self.build_request_params(requests1))
                elif self.nbrCriterium == 4:
                    for c1 in codes_nbr[ks[0]]:
                        requests = []
                        requests.append((ks[0], c1))
                        if ks[0] in multiselect:
                            requests.append((multiselect[ks[0]],''))
                        for c2 in codes_nbr[ks[1]]:
                            requests1 = requests + [(ks[1], c2)]
                            if ks[1] in multiselect:
                                requests1.append((multiselect[ks[1]],''))
                            # Iterates over optimal chunks of intermediary size criterium
                            n1 = 0
                            for c3 in codes_nbr[ks[2]][0:size[ks[2]]:n]:
                                requests2 = requests1 + [(ks[2],c4) for c4 in codes_nbr[ks[2]][n1:(n1+n)]]
                                if ks[2] in multiselect:
                                    requests2.append((multiselect[ks[2]],''))
                                n1 += n
                                # Combine with largest criterium as a whole
                                requests2 += [(ks[3],c4) for c4 in codes_nbr[ks[3]]]
                                if ks[3] in multiselect:
                                    requests2.append((multiselect[ks[3]],''))
                                self.params.append(self.build_request_params(requests2))
        else:
            # one chunk is enough
            requests = []
            for k in codes_nbr.keys():
                requests += [(k,c) for c in codes_nbr[k]]
                if k in multiselect:
                    requests.append((multiselect[k],''))
            self.params.append(self.build_request_params(requests))