def parse_adm_readall_ofs(p): if not p.haslayer('SAPMSAdmRecord'): print "Packet has no 'SAPMSAdmRecord'." exit(-1) print "[+] Text Storage" records = dict() for e in p.adm_records: name = e.rzl_strg_name value = str(e.rzl_strg_value) type_v = ms_adm_rzl_strg_type_values[e.rzl_strg_type] # encoding of value for logon group is binary (IP + port etc.) if value.startswith('LG_EYECAT'): value = parse_logon_group(value) records[name] = (type_v, value) # pretty print that for r in records.keys(): if isinstance(records[r][1], list): print red(r, bold=True) + '\t: ' + ' '.join(records[r][1]) elif records[r][0].endswith('_C'): print green(r) + '\t: ' + str(records[r][1]) #else: # print green(r) + '\t: ' + "[list of integers]" return records
def print_answer(p): fromname = p.fromname try: flag = ms_flag_values[p[SAPMS].flag] except: flag = "0" try: opcode = str(ms_opcode_values[p[SAPMS].opcode]) except: opcode = str(p[SAPMS].opcode) try: opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error]) except: opcode_err = 'None' if opcode_err == 'MSOP_OK': opcode_err = green(opcode_err) else: opcode_err = red(opcode_err, bold=True) if p.key != null_key: mskey_parse_print(p.key) key = p.key.encode('hex') else: key = "NULL" logger.debug("flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \ " opcode_error: " + green(opcode_err) + " key: %s" % key)
def parse_adm_readall_i(p): if not p.haslayer('SAPMSAdmRecord'): print "Packet has no 'SAPMSAdmRecord'." exit(-1) print "[+] Integer Storage" print records = dict() for e in p.adm_records: records[e.rzl_strg_name] = [e.rzl_strg_uptime, e.rzl_strg_integer1, e.rzl_strg_delay, e.rzl_strg_integer3, e.rzl_strg_users, e.rzl_strg_quality, e.rzl_strg_integer6, e.rzl_strg_integer7, e.rzl_strg_integer8, e.rzl_strg_integer9] # get back those 32 bits signed integers f = lambda x: x - 4294967296 if x > 0x7fffffff else x # pretty print that for r in records.keys(): tmp_r = map(f , records[r]) print green(r) + '\t: ' + '\t'.join([str(e) for e in tmp_r]) return records
def parse(self, response): # Extract fields title = LAW.TITLE.xt(response) parl_id = LAW.PARL_ID.xt(response) status = LAW.STATUS.xt(response) LLP = LegislativePeriod.objects.get( roman_numeral=response.url.split('/')[-4]) # Extract foreign keys category = LAW.CATEGORY.xt(response) description = LAW.DESCRIPTION.xt(response) # Create category if we don't have it yet cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) # Create and save Law law_data = { 'title': title, 'status': status, 'description': description } law_item, law_created = Law.objects.update_or_create( parl_id=parl_id, legislative_period=LLP, source_link=response.url, defaults=law_data) # Attach foreign keys law_item.keywords = self.parse_keywords(response) law_item.category = cat law_item.documents = self.parse_docs(response) law_item.save() # Log our progress if law_created: logtext = u"Created {} with id {}, LLP {} @ {}" else: logtext = u"Updated {} with id {}, LLP {} @ {}" logtext = logtext.format( red(title), cyan(u"[{}]".format(parl_id)), green(str(LLP)), blue(response.url) ) log.msg(logtext, level=log.INFO) response.meta['law_item'] = law_item # is the tab 'Parlamentarisches Verfahren available?' if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'): self.parse_parliament_steps(response) if response.xpath('//h2[@id="tab-VorparlamentarischesVerfahren"]'): self.parse_pre_parliament_steps(response)
def handle_answer(s, p): fromname = p.fromname try: flag = ms_flag_values[p[SAPMS].flag] except: flag = "0" try: opcode = str(ms_opcode_values[p[SAPMS].opcode]) except: opcode = str(p[SAPMS].opcode) try: opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error]) except: opcode_err = 'None' if opcode_err == 'MSOP_OK': opcode_err = green(opcode_err) else: opcode_err = red(opcode_err, bold=True) if p.key != null_key: key = " key: " + yellow('NOT NULL', bold=True) logger.error("[!] Out of order packets, reload this script.") #s.close() #exit(0) else: key = "" logger.info("flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \ " opcode_error: " + green(opcode_err) + key)
def parse(self, response): all_link_followed = False try: all_links = response.xpath('''//a[starts-with(text(),'Alle anzeigen')]/@href''') if len(all_links)>0: URLOPTIONS = collections.OrderedDict( urlparse.parse_qsl( urlparse.urlparse( all_links[0].extract() ).query) ) URLOPTIONS['LISTE']='' URLOPTIONS['letter']='' new_url = '{}?{}'.format(self.BASE_URL, urlencode(URLOPTIONS)) all_link_followed = True logger.debug(u"following show all link: {} -> {}".format( green(u'[{}]'.format(new_url)), response.url)) yield response.follow(new_url, self.parse_list) except: if opts['GP'] not in ('KN','PN',): raise if not all_link_followed: urloptions = response.url.split('?')[1] opts = dict(urlparse.parse_qsl(urloptions)) if not opts['GP'] in ('KN','PN',): logger.debug(u"no show all link, parsing list directly: {} -> {}".format( green(u'[{}]'.format(new_url)), response.url)) for x in self.parse_list(response): yield x
def handle_answer(s, p): fromname = p.fromname try: flag = ms_flag_values[p[SAPMS].flag] except: flag = "0" try: opcode = str(ms_opcode_values[p[SAPMS].opcode]) except: opcode = str(p[SAPMS].opcode) try: opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error]) except: opcode_err = 'None' if opcode_err == 'MSOP_OK': opcode_err = green(opcode_err) else: opcode_err = red(opcode_err, bold=True) if p.key != null_key: p.show() key = " key: " + yellow('NOT NULL', bold=True) print "[!] Out of order packets, reload this script." #s.close() #exit(0) else: key = "" print "flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \ " opcode_error: " + green(opcode_err) + key # "idenfify request from the server? if key != "" and flag == 'MS_REQUEST' and opcode == '0': s.send(ms_adm_nilist(p, 1))
def parse_inquiry_response(self, response): """ Callback function for parsing the inquiry responses """ inquiry_item = response.meta['inquiry_item'] source_link = response.url parl_id = response.url.split('/')[-2] title = INQUIRY.TITLE.xt(response) description = INQUIRY.RESPONSEDESCRIPTION.xt(response) LLP = inquiry_item.legislative_period category = INQUIRY.CATEGORY.xt(response) # Get or create Category object for the inquiry and log to screen if new # category is created. cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) try: sender_object = Person.objects.get( parl_id=INQUIRY.RESPONSESENDER.xt(response)) except: log.msg( red(u'Receiver was not found in database, skipping Inquiry {} in LLP {}' .format(parl_id, LLP))) return # Create or update Inquiry item inquiryresponse_item, inquiryresponse_created = InquiryResponse.objects.update_or_create( parl_id=parl_id, legislative_period=LLP, defaults={ 'title': title, 'source_link': source_link, 'description': description, 'sender': sender_object }) # Attach foreign Keys inquiryresponse_item.documents = self.parse_response_docs(response) inquiryresponse_item.category = cat # Save InquiryResponse object inquiryresponse_item.save() if inquiryresponse_created: logtext = u"[{} of {}] Created InquiryResponse {} with ID {}, LLP {} @ {}" else: logtext = u"[{} of {}] Updated InquiryResponse {} with ID {}, LLP {} @ {}" logtext = logtext.format(self.SCRAPED_COUNTER, self.TOTAL_COUNTER, cyan(title), cyan(u"{}".format(parl_id)), green(str(LLP)), blue(response.url)) log.msg(logtext, level=log.INFO) inquiry_item.response = inquiryresponse_item inquiry_item.save() return
def parse(self, response): persons = AUDITORS.LIST.xt(response) callback_requests = [] self.logger.info("Scraping {} persons".format(len(persons))) # Iterate all persons for p in persons: # Extract basic data parl_id = p["source_link"].split("/")[-2] p["source_link"] = "{}{}".format(BASE_HOST, p["source_link"]) # Create or update simple person's item person_data = {"reversed_name": p["reversed_name"]} person_item, created_person = Person.objects.update_or_create( source_link=p["source_link"], parl_id=parl_id, defaults=person_data ) if created_person: self.logger.info(u"Created Person {}".format(green(u"[{}]".format(p["reversed_name"])))) else: self.logger.info(u"Updated Person {}".format(green(u"[{}]".format(p["reversed_name"])))) mandate = p["mandate"] function_item, f_created = Function.objects.get_or_create(short=mandate["short"], title=mandate["title"]) if f_created: self.logger.info(u"Created function {}".format(green(u"[{}]".format(function_item.short)))) # Create and append mandate try: mandate_item, m_created = Mandate.objects.update_or_create( function=function_item, start_date=mandate["start_date"], end_date=mandate["end_date"] ) except: self.logger.info(red("Error saving Mandate {} ({} - {})".format(function_item, start_date, end_date))) import ipdb ipdb.set_trace() person_item.mandates.add(mandate_item) person_item.save() # First time we encounter a person, we scan her detail page too if not parl_id in self.persons_scraped: # Create Detail Page request req = scrapy.Request(p["source_link"], callback=self.parse_person_detail) req.meta["person"] = { "reversed_name": p["reversed_name"], "source_link": p["source_link"], "parl_id": parl_id, } callback_requests.append(req) self.persons_scraped.append(parl_id) return callback_requests
def parse_signatures(self, response): """ Parse the public signatures """ petition = response.meta['petition_item'] signatures = PETITION.SIGNATURES.xt(response) log.msg(u"Creating or updating {} signatures".format( green(u'{}'.format(len(signatures)))), level=log.INFO) # find latest saved signature date last_signature_date = datetime.date.fromtimestamp(0) try: last_signature_date = petition.petition_signatures.latest( 'date').date self.logger.debug(u'Latest signature date saved: {}'.format( green(u'{}'.format(last_signature_date)))) except: self.logger.warning(u'No latest signature date found') count_created = 0 count_bulk_create = 0 # signatures on the latest saved date signatures_ondate = [ sig for sig in signatures if sig['date'] == last_signature_date ] for signature in signatures_ondate: petition_signature, created = PetitionSignature.objects.get_or_create( petition=petition, **signature) if created: count_created += 1 signatures_afterdate = [ sig for sig in signatures if sig['date'] > last_signature_date ] # remove duplicates as pre-processing step for bulk_create # code for de-duplication for list of dicts used from: # http://stackoverflow.com/a/6281063/331559 signatures_afterdate = [ dict(y) for y in set( tuple(x.items()) for x in signatures_afterdate) ] signature_items = [] for signature in signatures_afterdate: signature_item = PetitionSignature(petition=petition, **signature) signature_items.append(signature_item) count_bulk_create += 1 PetitionSignature.objects.bulk_create(signature_items) self.logger.debug(u"Created {} and bulk created {} signatures".format( green(u'{}'.format(count_created)), green(u'{}'.format(count_bulk_create))))
def parse_signatures(self, response): """ Parse the public signatures """ petition = response.meta['petition_item'] signatures = PETITION.SIGNATURES.xt(response) log.msg(u"Creating or updating {} signatures".format( green(u'{}'.format(len(signatures))) )) # find latest saved signature date last_signature_date = datetime.date.fromtimestamp(0) try: last_signature_date = petition.petition_signatures.latest( 'date').date log.msg(u'Latest signature date saved: {}'.format( green(u'{}'.format(last_signature_date)) )) except: log.msg(u'No latest signature date found') count_created = 0 count_bulk_create = 0 # signatures on the latest saved date signatures_ondate = [ sig for sig in signatures if sig['date'] == last_signature_date] for signature in signatures_ondate: petition_signature, created = PetitionSignature.objects.get_or_create( petition=petition, **signature) if created: count_created += 1 signatures_afterdate = [ sig for sig in signatures if sig['date'] > last_signature_date] # remove duplicates as pre-processing step for bulk_create # code for de-duplication for list of dicts used from: # http://stackoverflow.com/a/6281063/331559 signatures_afterdate = [ dict(y) for y in set(tuple(x.items()) for x in signatures_afterdate)] signature_items = [] for signature in signatures_afterdate: signature_item = PetitionSignature(petition=petition, **signature) signature_items.append(signature_item) count_bulk_create += 1 PetitionSignature.objects.bulk_create(signature_items) log.msg(u"Created {} and bulk created {} signatures".format( green(u'{}'.format(count_created)), green(u'{}'.format(count_bulk_create)) ))
def checkFile(directory, expectedFile, projectName): "Check if a given expected file exists inside a directory" filename = projectName + expectedFile.extension filepath = os.path.join(directory, filename) if os.path.isfile(filepath): print green("Found %s data %s" % (expectedFile.format, filename)) if expectedFile.checkFN is not None: expectedFile.checkFN(expectedFile, filepath) else: print red("File %s (%s) missing" % (filename, expectedFile.name), bold=True) return None return filename
def checkConnection(args): import YakDB #Check request/reply connection print (blue("Checking request/reply connection...", bold=True)) conn = YakDB.Connection() conn.connect(args.req_endpoint) #Request server info print((conn.serverInfo()).decode("utf-8")) print(green("REQ/REP connection attempt successful")) #Check push/pull connection print (blue("Checking push/pull connection...", bold=True)) print(green("PUSH/PULL connection attempt successful")) conn = YakDB.Connection() conn.usePushMode() conn.connect(args.push_endpoint)
def parse(self, response): llps = LLP.xt(response) for llp in llps: llp_item, created_llp = LegislativePeriod.objects.update_or_create( roman_numeral=llp['roman_numeral'], defaults=llp) llp_item.save() if created_llp: self.logger.info(u"Created Legislative Period {}".format( green(u'[{}]'.format(llp['roman_numeral'])))) else: self.logger.info(u"Updated Legislative Period {}".format( green(u"[{}]".format(llp['roman_numeral']))))
def parse_steps(self, response): """ Parse the Pre-Law's steps """ law_item = response.meta['law_item'] # Create phase if we don't have it yet phase_item, created = Phase.objects.get_or_create(title='default') if created: log.msg(u"Created Phase {}".format( green(u'[{}]'.format(phase_item.title)))) steps = PRELAW.STEPS.xt(response) if steps: log.msg(u"Creating {} steps".format( cyan(u'[{}]'.format(len(steps))))) # Create steps for step in steps: step_item, created = Step.objects.update_or_create( title=step['title'], sortkey=step['sortkey'], date=step['date'], protocol_url=step['protocol_url'], law=law_item, phase=phase_item, source_link=response.url) step_item.save()
def iterateUniprotDatabases(quiet=True): """ Fetch the uniprot metadatabase by guessing valid integral database IDs. Guarantees to yield all databases up to 9999 """ template = "http://www.uniprot.org/database/%d.rdf" for i in range(300): #In the far future, there might be more DBs than 300. r = requests.get(template % i) if r.status_code == requests.codes.ok: if not quiet: print(green("[UniProt MetaDB] Fetching DB #%d" % i)) soup = BeautifulSoup(r.text) #Very, very crude RDF/XML parser rdf = soup.html.body.find("rdf:rdf") db = { "id": rdf.abbreviation.text, "name": rdf.abbreviation.text, "category": rdf.category.text, "description": rdf.find("rdfs:label").text, } url = rdf.find("rdfs:seealso")["rdf:resource"] if url: db["url"] = url urltemplate = rdf.urltemplate.text if urltemplate: db["urltemplate"] = urltemplate yield (db) else: if not quiet: print(red("[UniProt MetaDB] Database #%d does not exist" % i))
def f_check(): global timeout, base_url timeout = args.timeout logging.captureWarnings(True) # Capture the ssl warnings with the standard logging module if args.ssl: base_url = "https://{}:{}/{}".format(args.host, args.port, args.url) else: base_url = "http://{}:{}/{}".format(args.host, args.port, args.url) f_verbose("[*] Program will check out WebLogic for CVE-2017-3506 & 10271 vulnerability.") if f_run(): print red("[x]") + " Your system is potentially vulnerable to XML Serialization attack!" else: print green("[*]") + " Your system is " + blue("safe!")
def parse_debatelist(self, response): """ Parse list of debates """ llp = response.meta['llp'] if 'llp' in response.meta else None debate_type = response.meta['type'] \ if 'type' in response.meta else '' debates = RSS_DEBATES.xt(response) self.logger.info( green(u"{} debates from {}".format(len(debates), response.url))) # If SNR is set, use only a subset of debates for further parsing fetch_debates = filter( lambda r: r['protocol_url'] != "" and (not self.SNR or self.SNR in r['title']), debates) for debate in fetch_debates: debate['llp'] = llp debate['debate_type'] = debate_type debate['protocol_url'] = BASE_HOST + debate['protocol_url'] debate_item = self.store_debate(debate) yield scrapy.Request(debate['protocol_url'], callback=self.parse_debate, meta={'debate': debate_item})
def parse_debate(self, response): """ Debate-transcript ("Stenografisches Protokoll") parser """ i = 0 for i, sect in enumerate(DOCSECTIONS.xt(response)): # Lookup + add references to the section data sect['debate'] = response.meta['debate'] if 'speaker_id' in sect and sect['speaker_id'] is not None: try: sect['person'] = Person.objects.get( parl_id=sect['speaker_id']) except Person.DoesNotExist: self.logger.warning( red(u"Person '{}' not found".format( sect['speaker_id']))) else: sect['person'] = None # Select best timestamps for start and end and make datetime start_ts = sect['time_start'] or sect['ref_timestamp'] end_ts = sect['time_end'] or sect['ref_timestamp'] sect['date'] = self._apply_ts(sect['debate'].date, start_ts) sect['date_end'] = self._apply_ts(sect['debate'].date, end_ts) self.store_statement(sect, i) self.logger.info( green(u"Saved {} sections from {}".format(i, response.url)))
def parse_debate(self, response): """ Debate-transcript ("Stenografisches Protokoll") parser """ for i, sect in enumerate(DOCSECTIONS.xt(response)): # Lookup + add references to the section data sect['debate'] = response.meta['debate'] if 'speaker_id' in sect and sect['speaker_id'] is not None: try: sect['person'] = Person.objects.get( parl_id=sect['speaker_id']) except Person.DoesNotExist: self.logger.warning( red(u"Person '{}' not found".format(sect['speaker_id']))) if sect['ref_timestamp'] is not None \ and len(sect['ref_timestamp']) == 2: sect['date'] = sect['debate'].date.replace( minute=sect['ref_timestamp'][0], second=sect['ref_timestamp'][1]) self.store_statement(sect, i) self.logger.info( green(u"Saved {} sections from {}".format(i, response.url)))
def inform(msg, minor=False, major=False): if major: ansicolor.write_out(ansicolor.yellow('>>> %s\n' % msg)) elif minor: ansicolor.write_out(ansicolor.cyan('-> %s\n' % msg)) else: ansicolor.write_out(ansicolor.green('> %s\n' % msg))
def export_lang_to_db(lang, filt): count = 0 for file in findXLIFFFiles("cache/{}".format(lang), filt=filt): # e.g. '1_high_priority_platform/about.donate.xliff' canonicalFilename = "/".join(file.split("/")[2:]) section = canonicalFilename.partition("/")[0] # Dont index drafts if "learn.draft.xliff" in canonicalFilename: print(green("Skipping {}".format(canonicalFilename), bold=True)) # relevant_for_live relevant_for_live = False if canonicalFilename in relevant_for_live_files: relevant_for_live = True print(black(file, bold=True)) soup = parse_xliff_file(file) for entry in process_xliff_soup(soup, also_approved=True): obj = { "id": int(entry.ID), "source": entry.Source, "target": entry.Translated, "source_length": len(entry.Source), "is_translated": entry.IsTranslated, "is_approved": entry.IsApproved, "translation_source": "Crowdin", "file": canonicalFilename, "fileid": entry.FileID, "relevant_for_live": relevant_for_live } # Async write executor.submit(write_entry, obj, lang) # Stats count += 1 if count % 1000 == 0: print("Processed {} records".format(count))
def execute_scenario_by_steps(ctx: Context, scenario: Scenario) -> None: """Step executor for setup and teardown tagged scenarios Args: ctx: The behave context scenario: The behave scenario object """ # Set an empty list of steps to run parsed_steps = [] # For each step put the step in the parsed list for step in scenario.steps: parsed_steps.append(f"{step.keyword} {step.name}") # check to see if we have a table with our step. If we do make sure we put the headings # and rows into the parsed steps list so we execute the full step if step.table: heading_string = "" for heading in step.table.headings: heading_string += f"{heading}|" parsed_steps.append(f"|{heading_string}") for row in step.table.rows: row_string = "|".join(row.cells) parsed_steps.append(f"|{row_string}|") steps_string = "\n".join(parsed_steps) for step in parsed_steps: print(ansicolor.green(f" {step}")) # noqa print("\n") # noqa ctx.execute_steps(steps_string)
def performPOTDownload(lang, argtuple): """ Explicitly uncurried function that downloads a single Crowdin file to a filesystem file. fileid, filepath """ # Extract argument tuple fileid, filepath = argtuple urlPrefix = "http://crowdin.khanacademy.org/project/khanacademy/{0}/{1}/".format( lang, fileid) # Initialize session s = getCrowdinSession() # Trigger export exportResponse = s.get(urlPrefix + "export", headers={"Accept": "application/json"}) #print(exportResponse.text) if exportResponse.json()["success"] != True: raise Exception("Crowdin export failed: " + response.text) # Trigger download # Store in file with open(filepath, "w+b") as outfile: response = s.get(urlPrefix + "download", stream=True) if not response.ok: raise Exception("Download error") for block in response.iter_content(1024): outfile.write(block) print(green("Downloaded %s" % filepath))
def performPOTDownload(lang, argtuple): """ Explicitly uncurried function that downloads a single Crowdin file to a filesystem file. fileid, filepath """ # Extract argument tuple fileid, filepath = argtuple urlPrefix = "https://crowdin.com/project/khanacademy/{0}/{1}/".format(lang, fileid) # Initialize session s = getCrowdinSession() # Trigger export exportResponse = s.get(urlPrefix + "export", headers={"Accept": "application/json"}) #print(exportResponse.text) if exportResponse.json()["success"] != True: raise Exception("Crowdin export failed: " + exportResponse.text) # Trigger download # Store in file with open(filepath, "w+b") as outfile: response = s.get(urlPrefix + "download", stream=True) if not response.ok: raise Exception("Download error") for block in response.iter_content(1024): outfile.write(block) print(green("Downloaded %s" % filepath))
def parse_op_steps(self, response): """ Parse the Opinions's steps """ opinion = response.meta['opinion'] # Create phase if we don't have it yet phase_item, created = Phase.objects.get_or_create( title='default_op') if created: log.msg(u"Created Phase {}".format( green(u'[{}]'.format(phase_item.title)))) steps = OPINION.STEPS.xt(response) # Create steps for step in steps: step_item, created = Step.objects.update_or_create( title=step['title'], sortkey=step['sortkey'], date=step['date'], protocol_url=step['protocol_url'], opinion=opinion, phase=phase_item, source_link=response.url ) step_item.save() return len(steps)
def parse_debatelist(self, response): """ Parse list of debates """ llp = response.meta['llp'] if 'llp' in response.meta else None debate_type = response.meta['type'] \ if 'type' in response.meta else '' debates = RSS_DEBATES.xt(response) self.logger.info(green(u"{} debates from {}".format(len(debates), response.url))) # If SNR is set, use only a subset of debates for further parsing fetch_debates = filter(lambda r: r['protocol_url'] != "" and (not self.SNR or self.SNR in r['title']), debates) for debate in fetch_debates: debate['llp'] = llp debate['debate_type'] = debate_type debate['protocol_url'] = BASE_HOST + debate['protocol_url'] debate_item = self.store_debate(debate) yield scrapy.Request(debate['protocol_url'], callback=self.parse_debate, meta={'debate': debate_item})
def parse_debate(self, response): """ Debate-transcript ("Stenografisches Protokoll") parser """ i = 0 for i, sect in enumerate(DOCSECTIONS.xt(response)): # Lookup + add references to the section data sect['debate'] = response.meta['debate'] if 'speaker_id' in sect and sect['speaker_id'] is not None: try: sect['person'] = Person.objects.get( parl_id=sect['speaker_id']) except Person.DoesNotExist: self.logger.warning( red(u"Person '{}' not found".format(sect['speaker_id']))) else: sect['person'] = None # Select best timestamps for start and end and make datetime start_ts = sect['time_start'] or sect['ref_timestamp'] end_ts = sect['time_end'] or sect['ref_timestamp'] sect['date'] = self._apply_ts(sect['debate'].date, start_ts) sect['date_end'] = self._apply_ts(sect['debate'].date, end_ts) self.store_statement(sect, i) self.logger.info( green(u"Saved {} sections from {}".format(i, response.url)))
def parse_inquiry_response(self, response): """ Callback function for parsing the inquiry responses """ inquiry_item = response.meta.get('inquiry_item',None) # allow testing single urls for parsing errors source_link = response.url parl_id = response.url.split('/')[-2] title = INQUIRY.TITLE.xt(response) description = INQUIRY.RESPONSEDESCRIPTION.xt(response) LLP = inquiry_item.legislative_period if inquiry_item else None category = INQUIRY.CATEGORY.xt(response) # Get or create Category object for the inquiry and log to screen if new # category is created. cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) try: sender_object = Person.objects.get( parl_id=INQUIRY.RESPONSESENDER.xt(response)) except Exception, e: log.msg(red(u'Sender "{}" was not found in database, skipping Inquiry {} in LLP {}'.format( INQUIRY.RESPONSESENDER.xt(response), parl_id, LLP))) return
def parse_steps(self, response): """ Callback function to parse the single-page history for normal inquiries """ inquiry_item = response.meta['inquiry_item'] # Get or created a default-phase for inquiries, because there are no phases in # simple inquiries. phase_item, created = Phase.objects.get_or_create( title='default_inqu') if created: log.msg(u"Created Phase {}".format( green(u'[{}]'.format(phase_item.title)))) steps = INQUIRY.STEPS.xt(response) if "Schriftliche Beantwortung" in steps[-1]["title"]: response_link = INQUIRY.RESPONSE_LINK.xt(response) else: response_link = 0 for step in steps: step_item, created = Step.objects.update_or_create( title=step['title'], sortkey=step['sortkey'], date=step['date'], protocol_url=step['protocol_url'], inquiry=inquiry_item, phase=phase_item, source_link=response.url ) step_item.save() return response_link
def diag_grab_password(packet): if not packet.haslayer(SAPMS): return p = Packet() atoms = None try: p = SAPDiag(str(packet[SAPMS])) atoms = p[SAPDiag].get_item(["APPL", "APPL4"], "DYNT", "DYNT_ATOM") except: pass # Print the Atom items information if atoms: logger.info("[*] Input fields:") current_user = None current_pass = None for atom in [ atom for atom_item in atoms for atom in atom_item.item_value.items ]: if atom.etype in [121, 122, 123, 130, 131, 132]: text = atom.field1_text or atom.field2_text text = text.strip() if not text: continue if atom.attr_DIAG_BSD_INVISIBLE and len(text) > 0: logger.info("\tPassword field:\t%s" % green(text, bold=True)) current_pass = text else: logger.info("\tRegular field:\t%s" % (text)) current_user = text if current_user and current_pass: print "$ rfc_exec.py --host %s -S %s -C XXX -U '%s' -P '%s' -c info" % ( attacked_as['ip'], '00', current_user, current_pass)
def parse_op_steps(self, response): """ Parse the Opinions's steps """ opinion = response.meta['opinion'] # Create phase if we don't have it yet phase_item, created = Phase.objects.get_or_create(title='default_op') if created: log.msg(u"Created Phase {}".format( green(u'[{}]'.format(phase_item.title)))) steps = OPINION.STEPS.xt(response) # Create steps for step in steps: step_item, created = Step.objects.update_or_create( title=step['title'], sortkey=step['sortkey'], date=step['date'], protocol_url=step['protocol_url'], opinion=opinion, phase=phase_item, source_link=response.url) step_item.save() return len(steps)
def restore(cls, url): hostname = urlrewrite.get_hostname(url) filename = urlrewrite.hostname_to_filename(hostname) q, wb = None, None if (ioutils.file_exists(filename + ".web", dir=ioutils.LOGDIR)): ioutils.write_err("Restoring web from %s ..." % ansicolor.yellow(filename + ".web")) wb = ioutils.deserialize(filename + ".web", dir=ioutils.LOGDIR) ioutils.write_err(ansicolor.green("done\n")) if (ioutils.file_exists(filename + ".session", dir=ioutils.LOGDIR)): ioutils.write_err("Restoring session from %s ..." % ansicolor.yellow(filename + ".session")) q = ioutils.deserialize(filename + ".session", dir=ioutils.LOGDIR) q = recipe.overrule_records(q) ioutils.write_err(ansicolor.green("done\n")) return cls(wb=wb, queue=q)
def parse_steps(self, response): """ Callback function to parse the single-page history for normal inquiries """ response_link = [] inquiry_item = response.meta['inquiry_item'] # Get or created a default-phase for inquiries, because there are no phases in # simple inquiries. phase_item, created = Phase.objects.get_or_create(title='default_inqu') if created: log.msg(u"Created Phase {}".format( green(u'[{}]'.format(phase_item.title)))) steps = INQUIRY.STEPS.xt(response) for step in steps: if "Schriftliche Beantwortung" in step["title"]: response_link = INQUIRY.RESPONSE_LINK.xt(response) for step in steps: step_item, created = Step.objects.update_or_create( title=step['title'], sortkey=step['sortkey'], date=step['date'], protocol_url=step['protocol_url'], law=inquiry_item, phase=phase_item, source_link=response.url) step_item.save() if response_link: return response_link else: return
def parse_inquiry_response(self, response): """ Callback function for parsing the inquiry responses """ inquiry_item = response.meta.get('inquiry_item',None) # allow testing single urls for parsing errors source_link = response.url parl_id = response.url.split('/')[-2] title = INQUIRY.TITLE.xt(response) description = INQUIRY.RESPONSEDESCRIPTION.xt(response) LLP = inquiry_item.legislative_period if inquiry_item else None category = INQUIRY.CATEGORY.xt(response) # Get or create Category object for the inquiry and log to screen if new # category is created. cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category))),level=log.DEBUG) try: sender_object = Person.objects.get( parl_id=INQUIRY.RESPONSESENDER.xt(response)) except Exception, e: log.warning(red(u'Sender "{}" was not found in database, skipping Inquiry {} in LLP {}'.format( INQUIRY.RESPONSESENDER.xt(response), parl_id, LLP))) return
def parse_steps(self, response): """ Parse the Pre-Law's steps """ law_item = response.meta['law_item'] # Create phase if we don't have it yet phase_item, created = Phase.objects.get_or_create( title='default') if created: log.msg(u"Created Phase {}".format( green(u'[{}]'.format(phase_item.title)))) steps = PRELAW.STEPS.xt(response) if steps: log.msg(u"Creating {} steps".format( cyan(u'[{}]'.format(len(steps))))) # Create steps for step in steps: step_item, created = Step.objects.update_or_create( title=step['title'], sortkey=step['sortkey'], date=step['date'], protocol_url=step['protocol_url'], law=law_item, phase=phase_item, source_link=response.url ) step_item.save()
def parse_opinion(self, response): """ Parse one pre-law opinion """ op_data = response.meta['op_data'] parl_id = LAW.PARL_ID.xt(response) description = LAW.DESCRIPTION.xt(response) docs = self.parse_docs(response) category = self.parse_category(response) keywords = self.parse_keywords(response) entity = OPINION.ENTITY.xt(response) entity['title'] = op_data['title'] or entity['title_detail'] entity['title_detail'] = entity['title_detail'] entity['email'] = entity['email'] or op_data['email'] entity_item, created = Entity.objects.get_or_create( title=entity['title'], title_detail=entity['title_detail'] ) if entity['phone'] and not entity_item.phone: entity_item.phone = entity['phone'] if entity['email'] and not entity_item.email: entity_item.email = entity['email'] opinion_item, created = Opinion.objects.get_or_create( parl_id=parl_id, defaults={ 'date': op_data['date'], 'description': description, 'source_link': response.url, 'entity': entity_item, 'prelaw': response.meta['law_item'], 'category': category } ) # Foreign Keys opinion_item.documents = docs opinion_item.keywords = keywords response.meta['opinion'] = opinion_item step_num = self.parse_op_steps(response) entity_str = u"{} / {} / {} [{}]".format( green(entity_item.title_detail), entity_item.phone, entity_item.email, 'new' if created else 'updated') log.msg( u"Opinion: {} by {}".format( magenta(opinion_item.parl_id), entity_str ))
def parse(self, response): llps = LLP.xt(response) for llp in llps: llp_item, created_llp = LegislativePeriod.objects.update_or_create( roman_numeral=llp['roman_numeral'], defaults=llp ) llp_item.save() if created_llp: self.logger.info(u"Created Legislative Period {}".format( green(u'[{}]'.format(llp['roman_numeral'])))) else: self.logger.info(u"Updated Legislative Period {}".format( green(u"[{}]".format(llp['roman_numeral'])) ))
def write_progress(self, rate=None, prestart=None, wait=None, complete=False, error=None): # compute string lengths action = self.action.rjust(self.actionwidth) if error: rate = error elif prestart: rate = "starting" elif wait: rate = ("%s" % self.retry_wait) + "s..." elif complete: rate = "done" else: rate = "%s/s" % self.format_size(rate) rate = rate.ljust(self.ratewidth) url = self.url_fmt if self.totalsize: size = self.format_size(self.totalsize) elif self.download_size: size = self.format_size(self.download_size) else: size = "????? B" size = (" %s" % size).ljust(self.sizewidth) # add formatting if error: rate = ansicolor.red(rate) elif prestart or wait: rate = ansicolor.cyan(rate) elif complete: rate = ansicolor.green(rate) else: rate = ansicolor.yellow(rate) # draw progress bar if not (error or prestart or complete) and self.totalsize: c = int(self.urlwidth * self.download_size / self.totalsize) url = ansicolor.wrap_string(self.url_fmt, c, None, reverse=True) if not self.totalsize: size = ansicolor.yellow(size) line = "%s :: %s " % (action, rate) term = (os.environ.get("DEBUG_FETCH") and "\n") or "\r" if error or complete: term = "\n" ioutils.write_err("%s%s%s%s" % (line, url, size, term)) # log download if error: self.log_url(error, error=True) elif complete: self.log_url("done")
def get_state_item(self, state): # Do we have this party already? state_item, created = State.objects.update_or_create(name=state["short"], title=state["long"]) if created: state_item.save() self.logger.info(u"Created state {}: '{}'".format(green(u"[{}]".format(state_item.name)), state_item.title)) return state_item
def parse_category(self, response): category = LAW.CATEGORY.xt(response) # Create category if we don't have it yet cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) return cat
def updateTranslations(args): if args.all_languages: for language in findAvailableLanguages(): print(green("Downloading language {0}".format(language), bold=True)) args.language = language updateTranslation(args) # Cleanup objects (especially the pool) left from last language gc.collect() else: # Single language updateTranslation(args)
def updateTranslations(args): if args.all_languages: for language in findAvailableLanguages(): print(green("Downloading language {}".format(language), bold=True)) args.language = language updateTranslation(args) # Cleanup objects (especially the pool) left from last language gc.collect() else: # Single language updateTranslation(args)
def parse_adm_readall_ofs(p): if not p.haslayer('SAPMSAdmRecord'): print "Packet has no 'SAPMSAdmRecord'." exit(-1) logger.info("[+] Dumping Text Storage") records = dict() for e in p.adm_records: name = e.rzl_strg_name value = str(e.rzl_strg_value) type_v = ms_adm_rzl_strg_type_values[e.rzl_strg_type] records[name] = (type_v, value) # pretty print that for r in records.keys(): if records[r][1].startswith('LG_EYECAT'): print red(r, bold=True) + '\t: ' + ' '.join( parse_logon_group(records[r][1])) elif records[r][0].endswith('_C'): print green(r) + '\t: ' + str(records[r][1]) return records
def __init__(self, lang="de"): self.lang = lang if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: self.mode = "google-api" self.client = translate.Client(target_language=lang) self.translate = self._googleapi_translate print(green("Using google cloud translation API")) else: self.mode = "googletrans" self.translate = self._googletrans_translate print(red("Using googletrans"))
def cli(): search = subprocess.check_output( "ag -i -C 2 '// ?todo' | sed 's#\([^/]\)[^/: ]*/#\\1/#g'", shell=True) search_lines = search.split("\n") search_lines.pop() print("") if not len(search_lines): print(green("✓ No TODOs found!")) sys.exit(0) print(yellow("⚠ Be aware of these TODOs:")) todo_line_pattern = re.compile(".*// ?todo.*", re.IGNORECASE) for line in search_lines: if re.match(todo_line_pattern, line): print(green(line)) else: print(line) print("")
def save(self): hostname = urlrewrite.get_hostname(self.wb.root.url) filename = urlrewrite.hostname_to_filename(hostname) ioutils.write_err("Saving session to %s ..." % ansicolor.yellow(filename + ".{web,session}")) ioutils.serialize(self.wb, filename + ".web", dir=ioutils.LOGDIR) if self.queue: ioutils.serialize(self.queue, filename + ".session", dir=ioutils.LOGDIR) # only web being saved, ie. spidering complete, remove old session elif ioutils.file_exists(filename + ".session", dir=ioutils.LOGDIR): ioutils.delete(filename + ".session", dir=ioutils.LOGDIR) ioutils.write_err(ansicolor.green("done\n"))
def get_state_item(self, state): # Do we have this party already? state_item, created = State.objects.update_or_create( name=state['short'], title=state['long']) if created: state_item.save() self.logger.info(u"Created state {}: '{}'".format( green(u'[{}]'.format(state_item.name)), state_item.title)) return state_item
def store_debate(self, data): """ Save (update or insert) debate to ORM """ try: debate = Debate.objects.get(llp=data['llp'], nr=data['nr']) except Debate.DoesNotExist: debate = Debate() for (key, value) in data.items(): setattr(debate, key, value) debate.save() self.logger.info(green(u"Debate metadata saved {}".format(debate))) return debate
def parse_keywords(self, response): keywords = LAW.KEYWORDS.xt(response) # Create all keywords we don't yet have in the DB keyword_items = [] for keyword in keywords: kw, created = Keyword.objects.get_or_create(title=keyword) if created: log.msg(u"Created keyword {}".format(green(u"[{}]".format(keyword)))) keyword_items.append(kw) return keyword_items
def get_party_item(self, mandate): # Do we have this party already? party_item, created = Party.objects.update_or_create(short=mandate["short"]) titles = party_item.titles if mandate["title"] not in titles: titles.append(mandate["title"]) party_item.titles = titles party_item.save() if created: self.logger.info(u"Created party {}".format(green(u"[{}]".format(party_item.short)))) return party_item
def get_administration_item(self, mandate): # Do we have this administration already? admin_data = { 'start_date': mandate['administration']['start_date'], 'end_date': mandate['administration']['end_date'] } admin_item, created = Administration.objects.update_or_create( title=mandate['administration']['title'][0], defaults=admin_data) if created: admin_item.save() self.logger.info(u"Created administration {}".format( green(u'[{}]'.format(admin_item.title)))) return admin_item
def get_party_item(self, mandate): # Do we have this party already? party_item, created = Party.objects.update_or_create( short=mandate['short']) titles = party_item.titles if not titles: titles = [] if mandate['title'] not in titles: titles.append(mandate['title']) party_item.titles = titles party_item.save() if created: self.logger.info(u"Created party {}".format( green(u'[{}]: {}'.format(party_item.short, party_item.titles)))) return party_item
def importMeSH(args, infile): db = DocumentDB.YakDBDocumentDatabase(mode="PUSH") # NOTE: MeSH 2015 contains only 27k entities batch = db.entityIdx.newWriteBatch(chunkSize=40000) print(green("Starting to import entities from %s" % infile)) # Read file with open(infile, "r") as infile: writeStartTime = time.time() for mesh in readMeSH(infile): # Write entity to database batch.writeEntity(meshEntryToEntity(mesh)) # Statistics if batch.numWrites % 5000 == 0: deltaT = time.time() - writeStartTime entityWriteRate = batch.numWrites / deltaT print("Wrote %d entities at %.1f e/s" % (batch.numWrites, entityWriteRate)) print("Wrote overall %d entities" % batch.numWrites)
def parse_person_detail(self, response): """ Parse a persons detail page before creating the person object """ person = response.meta['person'] self.logger.info(u"Updating Person Detail {}".format( green(u"[{}]".format(person['reversed_name'])) )) full_name = PERSON.DETAIL.FULL_NAME.xt(response) bio_data = PERSON.DETAIL.BIO.xt(response) profile_photo_url = PERSON.DETAIL.PHOTO_URL.xt(response) profile_photo_copyright = PERSON.DETAIL.PHOTO_COPYRIGHT.xt(response) try: person_data = { 'photo_link': "{}{}".format(BASE_HOST, profile_photo_url), 'photo_copyright': profile_photo_copyright, 'full_name': full_name, 'reversed_name': person['reversed_name'], 'birthdate': bio_data['birthdate'], 'birthplace': bio_data['birthplace'], 'deathdate': bio_data['deathdate'], 'deathplace': bio_data['deathplace'], 'occupation': bio_data['occupation']} person_item, created_person = Person.objects.update_or_create( source_link=person['source_link'], parl_id=person['parl_id'], defaults=person_data ) person_item.save() # Instatiate slug person_item.slug except: self.logger.info(red("Error saving Person {}".format(full_name))) import ipdb ipdb.set_trace() return
def importUniprot(args, infile): db = DocumentDB.YakDBDocumentDatabase(mode="PUSH") batch = db.entityIdx.newWriteBatch(chunkSize=25000) print(green("Starting to import entities from %s" % infile)) # Read uniprot file, zcat is about 5-10 times faster and # distributes load over multiple cores. p = subprocess.Popen(["zcat", infile], stdout=subprocess.PIPE) writeStartTime = time.time() for uniprot in readUniprot(p.stdout): # Write entity to database batch.writeEntity(uniprotEntryToEntity(uniprot)) # Statistics if batch.numWrites % 10000 == 0: deltaT = time.time() - writeStartTime entityWriteRate = batch.numWrites / deltaT print("Wrote %d entities at %.1f e/s" % (batch.numWrites, entityWriteRate)) #Wait for subprocess to exit p.communicate() print("Wrote overall %d entities" % batch.numWrites)