def _parse_one_line(self, s): d = collections.OrderedDict() i = 0 ll = [] line_2 = "" buf = StringIO.StringIO(s) while True: line = buf.readline() if line: if line.startswith('*') or line == '' or line == '\n': continue if line.startswith('+'): line = utils.clean_string(line) line_2 = utils.clean_string(line_2) line = utils.merge_two_string(line_2, line) line_2 = line continue ll = self._check_element(line_2) d[i] = ll i = i + 1 line_2 = line else: ll = self._check_element(line_2) d[i] = ll i = i + 1 return d
def dialogok(self): '''helper to show an OK dialog with a message''' headertxt = clean_string(self.params.get("header", "")) bodytxt = clean_string(self.params.get("message", "")) dialog = xbmcgui.Dialog() dialog.ok(heading=headertxt, line1=bodytxt) del dialog
def parse_nodejsscan_file(threatplaybook, vul_result, project, target, scan, token): vul_dict = { 'name': str(vul_result.get('title')), 'tool': 'NodeJsScan', 'description': str(vul_result.get('description')), 'project': str(project), 'target': str(target), 'scan': str(scan), 'cwe': int(vul_result.get('cwe', 0)), 'observation': str(vul_result.get('observation', '')), 'remediation': str(vul_result.get('remediation', '')) } create_vulnerability_query = create_vulnerability(vul_dict=vul_dict) if create_vulnerability_query: response = _post_query(threatplaybook=threatplaybook, token=token, query=create_vulnerability_query) if response: cleaned_response_name = validate_vulnerability_response_name( content=response) vulnId = validate_vulnerability_response_id(content=response) evidence = { 'name': str( clean_string('File: {}, Line no: {}'.format( vul_result.get('path'), vul_result.get('line')))), 'url': str(clean_string(vul_result.get('filename'))), 'vulnId': str(vulnId), 'log': str(clean_string(vul_result.get('lines'))), } create_evidence_query = create_evidence(evidence=evidence) if create_evidence_query: evidence_response = _post_query(threatplaybook=threatplaybook, token=token, query=create_evidence_query) if evidence_response: cleaned_evidence_response = validate_evidence_response( content=evidence_response) if cleaned_evidence_response: print('Evidence Created: {}'.format( cleaned_evidence_response)) else: print('No Vulnerability Evidence') else: return { 'error': 'Error while creating Vulnerability Evidence Query' } return {'success': cleaned_response_name} else: return {'error': 'Error while creating Vulnerability'} else: return {'error': 'Error while creating Vulnerability Query'}
def search_constructor(string): """Construct command-line out.""" search_string = string.decode('utf-8') out_dict = search_feasts(search_string) if out_dict['result'] == 0: return textwrap.fill( u'Ваш запрос — «%s» не найден!' % search_string, initial_indent=INDENT) + u'\n' date_formated = lambda x, month_arr: u"".join([ unicode(x[0]), u' ', month_arr[x[1]], u' ', unicode(x[2]), u' г. ' ]) string_out = u"" pattern = re.compile(ur"\{red\}|\{bold\}") pattern_substr = re.compile(ur"(\{red\}|\{bold\})(.+?)(\{end\})") for item in out_dict['result']: item[2] = clean_string(item[2]) # Pretty highlighting output on terminal. sub_str_match = re.search(pattern_substr, item[2]) sub_str = sub_str_match.group() if sub_str_match else u"" match = re.search(pattern, sub_str) sub_str_replaced = sub_str.replace( u'{highlight_end}', u'{highlight_end}%s' % match.group() if match else u"{highlight_end}") item[2] = item[2].replace(sub_str, sub_str_replaced) # TODO: fix this ugly code in a future. string_out += \ textwrap.fill(date_formated( item[1], MONTH_WORD) + u'по н. ст.', initial_indent=INDENT,) + u'\n' + \ textwrap.fill( date_formated(item[0], MONTH_JULIAN_WORD) + u'от Адама.', initial_indent=INDENT,) + u'\n\n' + \ textwrap.fill(clean_string( item[2].format(**DICT_FORMAT)).lstrip(), width=60, initial_indent=INDENT, subsequent_indent=INDENT) + u'\n\n\n' results_count = numeral.choose_plural( out_dict['count'], ( u'найден {count} результат ', u'найденo {count} результата', u'найденo {count} результатов') ) return (u"\n" u"%s" u"-----------------------------------------------------------" u"\n" u" По запросу «%s» %s" u"\n") % (string_out, search_string, results_count.format(count=out_dict['count']))
def evaluate_message(text): ''' ''' if utils.clean_string(text).startswith('Team'): message_type = 'Team Submission' elif utils.clean_string(text) == 'Help': message_type = 'Help Request' elif utils.clean_string(text) == 'Standings': message_type = 'Standing Request' elif utils.clean_string(text) == 'Winning': message_type = 'Winning Request' elif utils.clean_string(text) == 'Loosing': message_type = 'Loosing Request' elif utils.clean_string(text) == 'Choices': message_type = 'Choices Request' elif utils.clean_string(text) == 'Fixtures': message_type = 'Fixture Request' elif utils.clean_string(text) == 'Position': message_type = 'Position Request' elif utils.clean_string(text) == 'Rules': message_type = 'Rules Request' else: message_type = 'Undefined' return message_type
def dialogyesno(self): '''helper to show a YES/NO dialog with a message''' headertxt = clean_string(self.params.get("header", "")) bodytxt = clean_string(self.params.get("message", "")) yesactions = self.params.get("yesaction", "").split("|") noactions = self.params.get("noaction", "").split("|") if xbmcgui.Dialog().yesno(heading=headertxt, line1=bodytxt): for action in yesactions: xbmc.executebuiltin(action.encode("utf-8")) else: for action in noactions: xbmc.executebuiltin(action.encode("utf-8"))
def as_dict(event_xml): a = event_xml.find('lloc_simple/adreca_simple') coord = event_xml.find('lloc_simple/adreca_simple/coordenades/googleMaps') return { "name": clean_string(event_xml.find('nom').text), "place": clean_string(event_xml.find('lloc_simple/nom').text), "address": clean_string("%s, %s %s (%s)" % (a.find('carrer').text, a.find('numero').text, a.find('codi_postal').text, a.find('municipi').text)), "neighborhood": clean_string(a.find('barri').text), "date": convert_or_raw(event_xml.find('data/data_proper_acte').text.split()[0], time.strptime, "%d/%m/%Y"), "starts_at": convert_or_raw(event_xml.find('data/data_proper_acte').text.split()[1], time.strptime, "%H.%M"), "ends_at": convert_or_raw(event_xml.find('data/hora_fi').text, time.strptime, "%H.%M"), "lat": convert_or_raw(coord.get('lat'), float), "lon": convert_or_raw(coord.get('lon'), float) }
def process_column_by_levels(df_data, column, levels): i = 0 for element in df_data[column]: element = unicode(utils.clean_string(element)).lower() aux = False for level in levels: level_clean = unicode(utils.clean_string(level)).lower() if element in level_clean: df_data.set_value(i, column, unicode(utils.clean_points(level))) aux = True if not aux: df_data.set_value(i, column, unicode("otra_" + str(column))) i += 1 return 0
def query(self, query): if len(query) == 0: raise EmptyQueryStringException() q_string = self._raw_endpoint + str(query) response = self._sesh.get(q_string) if response.status_code != 200: raise NetworkErrorException(response.status_code) soup = BeautifulSoup(response.content, "html.parser") # parse the panels panels = soup.find(panel_table_filter) panels_list = [] if panels is not None: for row in panels.find_all("tr"): cells = row.find_all("td") if len(cells) == 2: link = cells[0].a['href'] genes = [clean_string(gene) for gene in cells[1].string.split()] panels_list.append(QuiverFushionPlexPanel(link, genes)) # parse the fusions fusions = soup.find_all(fusion_table_filter) fusions_list = [] if fusions is not None: for fusion in fusions: table = fusion.find('table') for row in table.find_all('tr'): cells = row.find_all('td') if len(cells) != 2: # get the link link = cells[0].a['href'] original_annotation = clean_string(cells[1].string) disease = cells[2].string.strip() pubmed_link = cells[3].a['href'] evidence_count = int(cells[4].string) fusions_list.append(QuiverGeneFushion(link, original_annotation, disease, pubmed_link, evidence_count)) return QuiverResultSet(panels_list, fusions_list, query)
def clean_column(df_data, column, levels, binar): """Esta funcion se encarga de eliminar casos poco frecuentes y reemplazarlos como "otros" en la columna de datos original """ if binar: #print("la variable es binaria: ", column) i = 0 for element in df_data[column]: #clean element in every instance element = unicode(utils.clean_string(element)).lower() if unicode("si") in element: df_data.set_value(i, column, u"si") else: df_data.set_value(i, column, u"no") i += 1 else: #print("la variable no es binaria", column) if column == "Actividad_Fisica": process_act_fisica(df_data, column) elif column == "Nacionalidad": process_nacionalidad(df_data, column) elif column == "Dieta_Alimenticia": process_dieta(df_data, column) else: process_column_by_levels(df_data, column, levels)
def FindRawSubjects(self,subject1,subject2,update=False): ret=False #print "subject1=%s ,subject2=%s"%(subject1,subject2) if subject1 == None or subject2 == None or subject1 == subject2: return ret with open(self.m_data_file_path, 'r') as inF: for line in inF: cleanLine= utils.clean_string(line) #print "Line=%s\n"%line #if subject1 in line and subject2 in cleanLine: if not cleanLine.find(subject1) == -1 and not cleanLine.find(subject2) ==-1: matchObj= re.match(self._REGEX_SUBJECT, line, flags=0) if matchObj: user = matchObj.group() #print user if not user == "": #print "found user %s for subject %s and subject %s update dics.." %(user,subject1,subject2) ret=True if update: if not user in self.m_user2subDic: subjectsList=[] self.m_user2subDic[user]=subjectsList self.m_user2subDic[user].append(subject1) self.m_user2subDic[user].append(subject2) self.__AddToSub2User(subject1, user) self.__AddToSub2User(subject2, user) return ret
def get_steam_wishlist_game_names(steamid: str, sort: bool = True): url = f"https://store.steampowered.com/wishlist/profiles/{steamid}/wishlistdata/" game_names = [] try: response = requests.get(url) response.raise_for_status() wishlist_infos = response.json() # Looks like: {'244850': {'name': 'Space Engineers', 'capsule': ... # print(wishlist_infos) for key in wishlist_infos.keys(): # print(wishlist_infos[key]['name']) name = wishlist_infos[key]['name'] name = clean_string(name) game_names.append(name) except (ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError) as e: print("ERROR: get_steam_wishlist_game_names(...)") print(str(e)) if (sort): game_names.sort() return game_names
def scrape(self): page = self.lxmlize(COUNCIL_PAGE) councillors = page.xpath('//div[@class="article-content"]//td[@class="ms-rteTableOddCol-0"]') yield self.scrape_mayor(councillors[0]) assert len(councillors), 'No councillors found' for councillor in councillors[1:]: if not councillor.xpath('.//a'): continue texts = [text for text in councillor.xpath('.//text()') if clean_string(text)] name = texts[0] district = texts[1] url = councillor.xpath('.//a/@href')[0] page = self.lxmlize(url) p = Person(primary_org='legislature', name=name, district=district, role='Conseiller') p.add_source(COUNCIL_PAGE) p.add_source(url) p.image = councillor.xpath('./preceding-sibling::td//img/@src')[-1] contacts = page.xpath('.//td[@class="ms-rteTableOddCol-0"]//text()') for contact in contacts: if re.findall(r'[0-9]{4}', contact): phone = contact.strip().replace(' ', '-') p.add_contact('voice', phone, 'legislature') get_links(p, page.xpath('.//td[@class="ms-rteTableOddCol-0"]')[0]) email = self.get_email(page) p.add_contact('email', email) yield p
def fetch_translations(self): response = self.request(self.url + "requests/?token={0}".format(self.token)) if response[0].status == 200: etree = utils.element_tree(response) requests = etree.findall("resource") completed_requests = (r for r in requests if eval(r.findtext("ready"))) in_progress_requests = set( [tr.external_id for tr in TranslationRequest.objects.filter(status=STATUS_IN_PROGRESS)] ) for request in completed_requests: shortname = request.findtext("shortname") if shortname in in_progress_requests: trans_request = TranslationRequest.objects.get_by_external_id(shortname) response = self.request(self.url + "results/{0}/?token={1}".format(shortname, self.token)) etree = utils.element_tree(response) result = etree.findtext("result") result = re.sub("### (\[\[YAHOO_SPLITTER\]\]\n)?(### )?", "", result) result = re.sub("(<[A-Z]\[)?(\]>)?", "", result) result_sentences = [sentence.strip() for sentence in utils.clean_string(result.strip()).split("\n")] store = Store.objects.get(translation_project=trans_request.translation_project) units = store.unit_set.all() if not len(units) == len(result_sentences): trans_request.status = STATUS_ERROR print "ERROR!" else: for i in range(len(units)): units[i].target = result_sentences[i] units[i].state = pootle_store.util.FUZZY units[i].save() trans_request.status = STATUS_FINISHED trans_request.save() else: raise Exception(response[0].reason)
def get_filename(self, modus): lecturer = clean_string(self.lecturer.last_name) term = str(self.semester) if modus == 'participants': title = clean_string(self.title).replace(' ', '_') return 'Teilnehmer_%s_%s_%s.csv' % (title, lecturer, term) elif modus == 'csv': title = clean_string(self.title).replace(' ', '_') return 'Ergebnisse_%s_%s_%s.csv' % (title, lecturer, term) elif modus == 'pdf': title = clean_string(self.title).replace(' ', '_') # file ending will be added automatically in latex.py return 'Ergebnisse_%s_%s_%s' % (title, lecturer, term) elif modus == 'evaluation': return 'Evaluation %s (%s) %s' % (self.title, self.lecturer.last_name, term)
def get_query_params(raw_query): query = {} for pair in raw_query.split(","): key, value = [clean_string(s) for s in pair.split(":")] query[key] = query[key] + [value] if key in query else [value] return query
def get_mushroom_info(mushroom_id): """Retrieves the details for a mushroom""" dict_mushroom = {} url = os.path.join(_BASEPATH, _SUBPATH_MUSHROOM, '{}.html'.format(mushroom_id)) # Get url content page = utils.get_html_content(url, _HEADERS) soup = BeautifulSoup(page, 'html.parser') # Find details section div = soup.find('div', id='contenido_2') # Find scientific name scientific_name = div.find('h3').get_text() dict_mushroom['scientific_name'] = utils.clean_string(scientific_name) # Find family and genre h4s = div.find_all('h4') family = h4s[0].get_text().split(':')[1] genre = h4s[1].get_text().split(':')[1] dict_mushroom['family'] = utils.clean_string(family) dict_mushroom['genre'] = utils.clean_string(genre) # Find other details ul = div.find('ul', class_='ficha') ps = ul.find_all('p') for idx_p, p in enumerate(ps): key, value = p.get_text().split(':', 1) key = utils.normalize_key(key) value = utils.clean_string(value) if key == '': if idx_p == 0: key = "alternative_scientific_names" else: continue dict_mushroom[key] = value return dict_mushroom
def as_dict(station_xml): return { "id": int(station_xml.find('id').text), "street": clean_string(BicingProcessor.HTML_PARSER.unescape(station_xml.find('street').text)), "lat": convert_or_raw(station_xml.find('lat').text, float), "lon": convert_or_raw(station_xml.find('long').text, float), "slots": convert_or_raw(station_xml.find('slots').text, int), "bikes": convert_or_raw(station_xml.find('bikes').text, int) }
def input_team(name, message): ''' ''' round = utils.get_current_round() team = utils.clean_string(message.split('=')[1]) query = "INSERT INTO choices (name, choice, round) VALUES ('{}', '{}', '{}');".format(name, team, round) return utils.input_sql(query)
def serialize(self): filters = {'language':self.language} if self.default_field: filters["default_field"] = self.default_field if not isinstance(self.default_field, (str, unicode)) and isinstance(self.default_field, list): if not self.use_dis_max: filters["use_dis_max"] = self.use_dis_max if self.tie_breaker != 0: filters["tie_breaker"] = self.tie_breaker if self.search_fields: filters['fields'] = self.search_fields if self.default_operator != "OR": filters["default_operator"] = self.default_operator if self.options: filters["options"] = self.options if self.synonyms: filters["synonyms"] = self.synonyms if self.stopwords: filters["stopwords"] = self.stopwords if self.analyzer: filters["analyzer"] = self.analyzer if self.analyzer: filters["analyzer"] = self.analyzer if not self.allow_leading_wildcard: filters["allow_leading_wildcard"] = self.allow_leading_wildcard if not self.lowercase_expanded_terms: filters["lowercase_expanded_terms"] = self.lowercase_expanded_terms if not self.enable_position_increments: filters["enable_position_increments"] = self.enable_position_increments if self.fuzzy_prefix_length: filters["fuzzy_prefix_length"] = self.fuzzy_prefix_length if self.fuzzy_min_sim != 0.5: filters["fuzzy_min_sim"] = self.fuzzy_min_sim if self.phrase_slop: filters["phrase_slop"] = self.phrase_slop if self.min_concept_size!=2: filters["min_concept_size"] = self.min_concept_size if self.max_concept_size!=4: filters["max_concept_size"] = self.max_concept_size if self.term_expansion_steps!=1: filters["term_expansion_steps"] = self.term_expansion_steps if self.boost!=1.0: filters["boost"] = self.boost if self.clean_text: query = clean_string(self.text) if not query: raise InvalidQuery("The query is empty") filters["query"] = query else: if not self.text.strip(): raise InvalidQuery("The query is empty") filters["query"] = self.text return {self._internal_name:filters}
def reconcile(book, bank, externalData): """ Reconciles a book statement with a bank statement, supported by extra external data. Args: book: a Ledger object bank: a Ledger object externalData: extra data such as previous bank reconciliations Returns: Nothing Raises: Nothing """ # Algorithm 1: One-to-one reconciliation ut.clean_columns(book.ledger) bankColumn = ut.clean_string('G/L Account Name') # Filter by bank bookByBank = Ledger( ledger=book.ledger.query('{} == "{}"'.format(bankColumn, bank.name))) pivotTable = Ledger(ledger=ut.toPivotTable(bookByBank.ledger)) vendorPivotTable = Ledger( ledger=pivotTable.ledger.drop('Customer', axis=1).copy()) customerPivotTable = Ledger( ledger=pivotTable.ledger.drop('Vendor', axis=1).copy()) appendMatchColumn(vendorPivotTable) appendMatchColumn(customerPivotTable) appendMatchColumn(bank) oneToOneMatch(vendorPivotTable, bank, ut.clean_string('Vendor'), ut.clean_string('Debit')) oneToOneMatch(customerPivotTable, bank, ut.clean_string('Customer'), ut.clean_string('Credit')) # After getting all pivot table matches, map it back up to the Book Ledger reversePivotMapping(book, bookByBank, vendorPivotTable, customerPivotTable) # Reflect changes in excel highlightMatches(book) highlightMatches(bank) ut.newExcel(vendorPivotTable, 'VendorByDate.xlsx', 'Reconciliation') ut.newExcel(customerPivotTable, 'CustomerByDate.xlsx', 'Reconciliation') highlightMatches(vendorPivotTable) highlightMatches(customerPivotTable) return
def serialize(self): filters = {} if self.default_field: filters["default_field"] = self.default_field if not isinstance(self.default_field, (str, unicode)) and isinstance( self.default_field, list): if not self.use_dis_max: filters["use_dis_max"] = self.use_dis_max if self.tie_breaker != 0: filters["tie_breaker"] = self.tie_breaker if self.default_operator != "OR": filters["default_operator"] = self.default_operator if self.analyzer: filters["analyzer"] = self.analyzer if not self.allow_leading_wildcard: filters["allow_leading_wildcard"] = self.allow_leading_wildcard if not self.lowercase_expanded_terms: filters["lowercase_expanded_terms"] = self.lowercase_expanded_terms if not self.enable_position_increments: filters[ "enable_position_increments"] = self.enable_position_increments if self.fuzzy_prefix_length: filters["fuzzy_prefix_length"] = self.fuzzy_prefix_length if self.fuzzy_min_sim != 0.5: filters["fuzzy_min_sim"] = self.fuzzy_min_sim if self.phrase_slop: filters["phrase_slop"] = self.phrase_slop if self.search_fields: if isinstance(self.search_fields, (str, unicode)): filters["fields"] = [self.search_fields] else: filters["fields"] = self.search_fields if len(filters["fields"]) > 1: if not self.use_dis_max: filters["use_dis_max"] = self.use_dis_max if self.tie_breaker != 0: filters["tie_breaker"] = self.tie_breaker if self.boost != 1.0: filters["boost"] = self.boost if self.analyze_wildcard: filters["analyze_wildcard"] = self.analyze_wildcard if self.clean_text: query = clean_string(self.query) if not query: raise InvalidQuery("The query is empty") filters["query"] = query else: if not self.query.strip(): raise InvalidQuery("The query is empty") filters["query"] = self.query return {self._internal_name: filters}
def rename_project(): user_record = is_logged_in() if not user_record: return redirect("/") project_id = request.form.get("project_id", type=str, default="ID?") project_name = request.form.get("project_name", type=str, default="ID?") project_name = utils.clean_string(project_name) project_record = mongo.db.projects.find_one({'_id': project_id}) if project_record: project_record['project_name'] = project_name mongo.db.projects.save(project_record) flash('Project successfully renamed.') return redirect("/project")
def string_combinations(string, max_words): logger.info('') """ Takes a string text and creates a list of all possible neighbouring strings. :param string: Input string :param max_words: Number of neighbouring strings :return: list """ if max_words == 0: return None s = re.findall(r"[\w']+", string) r = combine(s, max_words) return [utils.clean_string(' '.join(a)) for a in r]
def serialize(self): filters = {} if self.default_field: filters["default_field"] = self.default_field if not isinstance(self.default_field, (str, unicode)) and isinstance(self.default_field, list): if not self.use_dis_max: filters["use_dis_max"] = self.use_dis_max if self.tie_breaker != 0: filters["tie_breaker"] = self.tie_breaker if self.default_operator != "OR": filters["default_operator"] = self.default_operator if self.analyzer: filters["analyzer"] = self.analyzer if not self.allow_leading_wildcard: filters["allow_leading_wildcard"] = self.allow_leading_wildcard if not self.lowercase_expanded_terms: filters["lowercase_expanded_terms"] = self.lowercase_expanded_terms if not self.enable_position_increments: filters["enable_position_increments"] = self.enable_position_increments if self.fuzzy_prefix_length: filters["fuzzy_prefix_length"] = self.fuzzy_prefix_length if self.fuzzy_min_sim != 0.5: filters["fuzzy_min_sim"] = self.fuzzy_min_sim if self.phrase_slop: filters["phrase_slop"] = self.phrase_slop if self.search_fields: if isinstance(self.search_fields, (str, unicode)): filters["fields"] = [self.search_fields] else: filters["fields"] = self.search_fields if len(filters["fields"]) > 1: if not self.use_dis_max: filters["use_dis_max"] = self.use_dis_max if self.tie_breaker != 0: filters["tie_breaker"] = self.tie_breaker if self.boost != 1.0: filters["boost"] = self.boost if self.analyze_wildcard: filters["analyze_wildcard"] = self.analyze_wildcard if self.clean_text: query = clean_string(self.query) if not query: raise InvalidQuery("The query is empty") filters["query"] = query else: if not self.query.strip(): raise InvalidQuery("The query is empty") filters["query"] = self.query return {self._internal_name:filters}
def reversePivotMapping(book, bookByBank, vendorPivotTable, customerPivotTable): """ Maps matched values from the pivot table to its corresponding un-pivoted Ledger Args: book: the un-pivoted Ledger of pivotTable bookByBank: is necessary to identify book entries belonging to a specific bank vendorPivotTable: a pivoted Ledger object for vendors customerPivotTable: a pivoted Ledger object for customers Returns: Nothing - the changes are reflected in book Raises: Nothing """ appendMatchColumn(book) # The pivots share the same indices indexValues = vendorPivotTable.ledger.index.values for pivotIndex in indexValues: vendorAmount = vendorPivotTable.ledger.at[pivotIndex, ut.clean_string('Vendor')] customerAmount = customerPivotTable.ledger.at[ pivotIndex, ut.clean_string('Customer')] date = vendorPivotTable.ledger.at[pivotIndex, ut.clean_string('Posting Date')] rows = bookByBank.ledger[bookByBank.ledger[ut.clean_string( 'Posting Date')].isin([date])] # Get the sum of all debits, check if it matches amounts = rows[ut.clean_string('Amount')] debits = amounts[isDebit(amounts)] credits = amounts[isCredit(amounts)] if vendorAmount == debits.sum(): for i in debits.index.values: book.ledger.at[i, 'Matches'] = 1 if customerAmount == credits.sum(): for i in credits.index.values: book.ledger.at[i, 'Matches'] = 1 return
def filter_keywords(kw_dict, max_words=2, min_words=1): out = {} items = sorted(kw_dict.items(), key=lambda x: x[1], reverse=True)[:15] for keyword, occurrences in items: keyword = utils.clean_string(keyword) if max_words >= len(keyword.split()) >= min_words: if len(keyword) > 2: if is_word_checked(keyword): out[keyword] = occurrences if len(out) > 0: return out else: return None
def parse_npmaudit_file(threatplaybook, vul_result, project, target, scan, token): severity_dict = {'moderate': 2, 'low': 1, 'critical': 3} vul_dict = { 'name': str(clean_string(vul_result.get('title'))), 'tool': 'Npm Audit', 'description': str(clean_string(vul_result.get('overview'))), 'project': str(project), 'target': str(target), 'scan': str(scan), 'cwe': int(vul_result.get('cwe', '').split('-')[-1]), 'observation': str(clean_string(vul_result.get('observation'))), 'severity': int(severity_dict.get(vul_result.get('severity'), 0)), 'remediation': str(clean_string(vul_result.get('recommendation'))) } create_vulnerability_query = create_vulnerability(vul_dict=vul_dict) if create_vulnerability_query: response = _post_query(threatplaybook=threatplaybook, query=create_vulnerability_query, token=token) if response: cleaned_response_name = validate_vulnerability_response_name( content=response) vulnId = validate_vulnerability_response_id(content=response) for finding in vul_result.get('findings'): for path in finding.get('paths'): evidence = { 'name': str(clean_string('File: {0}'.format(path))), 'url': str(clean_string(vul_result.get('module_name'))), 'vulnId': str(vulnId) } create_evidence_query = create_evidence(evidence=evidence) if create_evidence_query: evidence_response = _post_query( threatplaybook=threatplaybook, query=create_evidence_query, token=token) if evidence_response: cleaned_evidence_response = validate_evidence_response( content=evidence_response) if cleaned_evidence_response: print('Evidence Created: {}'.format( cleaned_evidence_response)) else: print('No Vulnerability Evidence') else: return { 'error': 'Error while creating Vulnerability Evidence Query' } return {'success': cleaned_response_name} else: return {'error': 'Error while creating Vulnerability'} else: return {'error': 'Error while creating Vulnerability Query'}
def scrape(stats_file, pokedes_file): tr_elements = utils.getData('http://pokemondb.net/pokedex/all', '//tr') col = [] # For each row, store each first element (header) and an empty list i = 0 for t in tr_elements[0]: name = t.text_content() if name == '#': name = 'no' col.append((utils.clean_string(name), [])) i += 1 # Since out first row is the header, data is stored on the second row onwards for j in range(1, len(tr_elements)): T = tr_elements[j] if len(T) != 10: break i = 0 for t in T.iterchildren(): data = t.text_content() if i > 0: try: data = int(data) except: pass col[i][1].append(data) i += 1 # Construct Data Frame using Pandas. Dict = {title: column for (title, column) in col} df = pd.DataFrame(Dict) # Apply clean up df['name'] = df['name'].apply(utils.str_bracket) df['type'] = df['type'].apply(utils.str_break) df['img_filename'] = df['name'] df['img_filename'] = df['img_filename'].apply(utils.generate_img_file_name) # Save to json df.to_json(stats_file, orient='records') # Save image_filename list utils.save_df_to_text(df, pokedes_file, 'img_filename')
def get_xbox_game_pass_game_names(sort: bool = True): # headers = {'User-Agent': 'User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'} # Determined from: https://www.xbox.com/en-US/xbox-game-pass/games?=pcgames game_ids_url = 'https://catalog.gamepass.com/sigls/v2?id=fdd9e2a7-0fee-49f6-ad69-4354098401ff&language=en-us&market=US' game_info_url = 'https://displaycatalog.mp.microsoft.com/v7.0/products?bigIds={}&market=US&languages=en-us&MS-CV=XXX' try: game_ids = requests.get(game_ids_url).json() s = ','.join(i['id'] for i in game_ids if 'id' in i) data = requests.get(game_info_url.format(s)).json() game_names = [] for p in data['Products']: # print(json.dumps(p['LocalizedProperties'][0], sort_keys=True, indent=4)) name = p['LocalizedProperties'][0]['ProductTitle'] name = clean_string(name) for word in extraneous_trailing_strings: if (name.endswith(word)): name = name.replace(word, '') break # if (name.startswith('yakuza kiwami 2')): # print(json.dumps(p, sort_keys=True, indent=4)) game_names.append(name) # print some data to screen: # for p in data['Products']: # print(p['LocalizedProperties'][0]['ProductTitle']) # print(p['LocalizedProperties'][0]['ShortTitle']) # print(p['LocalizedProperties'][0]['ShortDescription']) # print('-' * 80) except (ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError) as e: print("ERROR: get_xbox_game_pass_game_names(...)") print(str(e)) if (sort): game_names.sort() return game_names
def process_nacionalidad(df_data, column): i = 0 for element in df_data[column]: element = unicode(utils.clean_string(element)).lower() if unicode("chi") in element or\ unicode("temuco") in element or\ unicode("calama") in element or\ unicode("chlena") in element or\ unicode("calama") in element or\ unicode("chle") in element or\ unicode("chulena") in element or\ unicode("nacional") in element or\ unicode("hileno") in element: df_data.set_value(i, column, u"chilena") elif unicode("colombiano") in element or\ unicode("colom") in element: df_data.set_value(i, column, u"colombiano") elif unicode("peru") in element: df_data.set_value(i, column, u"peruano") elif unicode("argen") in element: df_data.set_value(i, column, u"argentina") elif unicode("bolivia") in element or\ unicode("boli") in element: df_data.set_value(i, column, u"boliviana") elif unicode("alemana") in element: df_data.set_value(i, column, u"alemana") elif unicode("estado") in element: df_data.set_value(i, column, u"estadounidense") elif unicode("ecua") in element: df_data.set_value(i, column, u"ecuatoriana") elif unicode("vnezolano") in element or\ unicode("vene") in element: df_data.set_value(i, column, u"venezolano") else: df_data.set_value(i, column, u"no especifica") i += 1
def process_act_fisica(df_data, column): i = 0 for element in df_data[column]: #clean element in every instance element = unicode(utils.clean_string(element)).lower() if unicode("1vezporsemana") in element: df_data.set_value(i, column, u"1 vez por semana") elif unicode("2vecesporsemana") in element: df_data.set_value(i, column, u"2 veces por semana") elif unicode("3vecesporsemana") in element: df_data.set_value(i, column, u"3 veces por semana") else: df_data.set_value(i, column, u"no") i += 1
def get_xbox_game_pass_game_names(sort: bool = True): # The following urls were determined from: https://www.xbox.com/en-US/xbox-game-pass/games?=pcgames. # Use Chrom Dev Tools to watch where data is being pulled from game_ids_url = 'https://catalog.gamepass.com/sigls/v2?id=fdd9e2a7-0fee-49f6-ad69-4354098401ff&language=en-us&market=US' game_info_url = 'https://displaycatalog.mp.microsoft.com/v7.0/products?bigIds={}&market=US&languages=en-us&MS-CV=XXX' try: game_ids = requests.get(game_ids_url).json() s = ','.join(i['id'] for i in game_ids if 'id' in i) data = requests.get(game_info_url.format(s)).json() game_names = [] for p in data['Products']: # print(json.dumps(p['LocalizedProperties'][0], sort_keys=True, indent=4)) name = p['LocalizedProperties'][0]['ProductTitle'] name = clean_string(name) for word in extraneous_trailing_strings: if (name.endswith(word)): name = name.replace(word, '') break game_names.append(name) # print some data to screen: # for p in data['Products']: # print(p['LocalizedProperties'][0]['ProductTitle']) # print(p['LocalizedProperties'][0]['ShortTitle']) # print(p['LocalizedProperties'][0]['ShortDescription']) # print('-' * 80) except (ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError) as e: print("ERROR: get_xbox_game_pass_game_names(...)") print(str(e)) if (sort): game_names.sort() return game_names
def check_validity(name, message): ''' ''' team = utils.clean_string(message.split('=')[1]) query = "SELECT COUNT(*) AS count FROM choices WHERE name = '{}' and choice = '{}'".format(name, team) df = utils.read_from_sql(query) times_chosen = df['count'][0] if times_chosen >= 2: validity = False else: validity = True return validity
def scrape(self): page = self.lxmlize(COUNCIL_PAGE) councillors = page.xpath( '//div[@class="article-content"]//td[@class="ms-rteTableOddCol-0"]' ) yield self.scrape_mayor(councillors[0]) assert len(councillors), 'No councillors found' for councillor in councillors[1:]: if not councillor.xpath('.//a'): continue texts = [ text for text in councillor.xpath('.//text()') if clean_string(text) ] name = texts[0] district = texts[1] url = councillor.xpath('.//a/@href')[0] page = self.lxmlize(url) p = Person(primary_org='legislature', name=name, district=district, role='Conseiller') p.add_source(COUNCIL_PAGE) p.add_source(url) p.image = councillor.xpath('./preceding-sibling::td//img/@src')[-1] contacts = page.xpath( './/td[@class="ms-rteTableOddCol-0"]//text()') for contact in contacts: if re.findall(r'[0-9]{4}', contact): phone = contact.strip().replace(' ', '-') p.add_contact('voice', phone, 'legislature') get_links(p, page.xpath('.//td[@class="ms-rteTableOddCol-0"]')[0]) email = self.get_email(page) p.add_contact('email', email) yield p
def get_keywords(self, txt, url, language='en'): """ Function that gets a text, and returns a dictionary of keywords. It does this by removing the stopwords in the text and then counting the words. :param txt: String of text, that we want to get keywords to. :param url: Url from which text was fetched. :return: """ logger.info(url + ':' + txt) outs = [] swords = set(self.stopwords[language]) for w in txt.split(): w = utils.clean_string(w).lower() if w not in swords and len(w) > 1: outs.append(w) out = Counter(outs).most_common() d = {} for w, o in out: d[w] = o return d
def process_dieta(df_data, column): i = 0 for element in df_data[column]: element = unicode(utils.clean_string(element)).lower() if unicode("vegetariano") in element: df_data.set_value(i, column, u"vegetariano") elif unicode("vegano") in element: df_data.set_value(i, column, u"vegano") elif unicode("omnivoro") in element: df_data.set_value(i, column, u"omnivoro") elif unicode("normal") in element: df_data.set_value(i, column, u"omnivoro") elif unicode("comodetodo") in element: df_data.set_value(i, column, u"omnivoro") else: df_data.set_value(i, column, u"otro") i += 1
def show(name, fname=None, timestamp=False): """ Shows plot and automatically saves after closing preview """ fname = os.path.join(Plotter.plot_save_directory, utils.clean_string(name)) if fname is None else fname # handle filesuffix if not (fname.endswith('.png') or fname.endswith('.svg')): fname += '.png' log('Plotting "%s"' % fname) # add timestamp if timestamp: parts = os.path.splitext(fname) no_ext = parts[0] no_ext += '_%s' % utils.get_strtime() fname = '%s%s' % (no_ext, parts[1]) # handle surrounding directory structure dire = os.path.dirname(fname) if len(dire) > 0 and not os.path.exists(dire): os.makedirs(dire) # fix confusing axis offset formatter = ptk.ScalarFormatter(useOffset=False) plt.gca().xaxis.set_major_formatter(formatter) plt.gca().yaxis.set_major_formatter(formatter) fig = plt.gcf() if Plotter.show_plots: plt.show() else: plt.close() # save plot fig.dpi = 250 fig.savefig(fname, dpi=fig.dpi)
def get_extra_info(self, cee): print(f"Downloading extra info for {cee}") info = {} url = MOVISTAR_DESCRIPTION_URL + cee response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") info["image"] = soup.find("img", class_="img-v-detail")["src"] info["desc"] = soup.find("div", class_="sinopsis_large").find( text=True, recursive=False).strip() info["details"] = {} details = soup.find("div", class_="details_container").find_all("div") for i in range(0, len(details) - 1, 2): title = clean_string(details[i].text) value_item = details[i + 1].find("span", class_="details_value") content_list = value_item.find_all("span") if content_list: value = [v.text.strip() for v in content_list] else: value = value_item.text.strip() info["details"][title] = value info["age_rating"] = soup.find("span", class_="nivel_moral").text.strip() return info
def create_project(): user_record = is_logged_in() if not user_record: return redirect("/") project_name = request.form.get("project_name", type=str, default="Exciting Project") project_name = utils.clean_string(project_name) project_url = utils.random_string(config.PROJECT_ID_LEN) try: aws.Bucket('swiftlatex').put_object(Key=project_url + '/manifest', Body='%s' % datetime.datetime.now()) except: return abort(500) project = {} project['_id'] = project_url project['project_name'] = project_name project['author_id'] = user_record['_id'] project['author_name'] = user_record['display_name'] project['created_time'] = datetime.datetime.now() project['modified_time'] = datetime.datetime.now() mongo.db.projects.insert_one(project) user_record['project_list'].append(project_url) mongo.db.users.save(user_record) return redirect('/project/postcreate/%s/' % project_url)
def search_feasts(search_string): """ Search saints and feasts in year menology. :param search_string: search input string (e.g. saint or feast name). :return list, witch contains gregorian & julian dates and search result. """ if len(search_string) < 3: return dict(count=0, result=[]) # TODO: Fix endings in the words: Андрей, Андриан endings = re.compile( ur"(ый|ой|ая|ое|ые|ому|а|ем|о|ов|у|е|й|" ur"ого|ал|ему|и|ство|ых|ох|ия|ий|ь|я|он|ют|ат|ин|ея)$", re.U ) search_string_clean = re.sub(endings, u"", search_string) pattern = re.compile(ur"(\b%s\w+\b)" % search_string_clean, re.I | re.U) if len(search_string) <= 5: endings = re.compile(ur"(а|ы|у|и|я)$", re.U) search_string_clean = re.sub(endings, u"", search_string) pattern = re.compile(ur"(\b%s[а-я]{0,2}\b)" % search_string_clean, re.I | re.U) date = datetime.date.today() year = convert_gregorian_to_julian(date.day, date.month, date.year)[2] year_menology = menology cal_out = [] # List of weekday names. for month in range(1, 13): days_in_month = calendar.monthrange(year, month)[1] for day in range(1, days_in_month + 1): cal = AncientCalendar(day, month, year, calendar="julian") weekdayname = cal.get_daily_feast() cal_out.extend([[list((day, month, year)), weekdayname]]) # Construct year menology with movable feasts. for item in cal_out: year_menology[item[0][1]][item[0][0]]["weekday"] = item[1] d = year_menology out = [] # Search string in menology. for _month, value in d.iteritems(): for _day, value1 in d[_month].iteritems(): for key2, content in d[_month][_day].iteritems(): if re.search(pattern, unicode(content)): _date = (_day, _month, year) out.extend( [ [ list(convert_julian_day_to_julian_am(convert_julian_to_julian_day(*_date))), list(convert_julian_to_gregorian(*_date)), clean_string(content), ] ] ) # Highliting search_string. start_str = u"{highlight_start}" end_str = u"{highlight_end}" pattern_highlite = re.compile(ur"(\b%s\w*\b)" % search_string_clean, re.I | re.U) tr = lambda m: start_str + m.group() + end_str result = [[t[0], t[1], pattern_highlite.sub(tr, t[2])] for t in out] # Count of serach results. count = len([re.findall(pattern, item[2]) for item in out]) return dict(count=count, result=result)
def team_submission_text(name, text): ''' ''' return '''Hi {}, you have submitted {} as your team for this week! If this is incorrect please redo your submission'''.format(name, utils.clean_string(text.split('=')[1]))
def fetch_translations(self): proxy = xmlrpclib.ServerProxy(self.url) # Fetch the translation requests (and make sure the result is a list) requests = utils.cast_to_list(proxy.list_requests(self.token)) # Retrieve the translation requests that are "ready" completedRequests = [request for request in requests if request['ready']] # Process the completed requests for completedRequest in completedRequests: # Get the result result = proxy.list_results(self.token, completedRequest['request_id']) # Get the external request id and sentences external_request_id = completedRequest['shortname'] result_sentences = utils.clean_string(result['result'].strip()).split('\n') # FIXME: Add exception handling when the translation request is not found. try: # Look up the original translation request request = TranslationRequest.objects.get_by_external_id(external_request_id) # TODO: Should we delete the translations, or filter them some way in Serverland to prevent re-retrieving translations? This could get expensive... if requst.status == STATUS_FINISHED: # Only update if the translation isn't marked as finished. continue # Fetch the Pootle store for the corresponding translation project and fill in the translations. store = Store.objects.get(translation_project = request.translation_project) # Get all of the units units = store.unit_set.all() # Make sure that len(units) matches len(result) # print units[0].source # print result_sentences[0] # print "----" # print units[len(units) - 1].source # print result_sentences[len(result_sentences)-1] # print result_sentences[len(result_sentences)-2] # If the sentence length doesn't match, then we don't have a one-to-one correspondence # between sentences. Mark the translation request with an error if (len(units) != len(result_sentences)): request.status = STATUS_ERROR print "An error occurred with request %s" % request else: # Start adding the translations to Pootle (lazy). for i in range(0, len(units)): # units[i].target = multistring(result_sentences[i]) units[i].target = result_sentences[i] units[i].state = pootle_store.util.FUZZY units[i].save() # Set the status of the current TranslationRequest as completed request.status = STATUS_FINISHED request.save() except ObjectDoesNotExist as ex: pass
def parse_header(self, string): self.code = string access = "private" string_split = re.split("((?:public|private|protected)\s*:)+", string) access_re = re.compile("(?:(public|private|protected)\s*:)+") class_re = re.compile( "(?:template\s*\<\s*(?:class|typename)\s+(?P<template_type>[\w+])\s*\>)?(?P<class_or_struct>class|struct)\s+(?P<class_name>\w+)(?:\s*:\s*(?P<inherited_classes>[:\w,\s]+))?\s*\{" ) # regular expression to match class methods in the form <return type> <method_name>(<method_arguments>) [<const>] [<implemented>] method_re = re.compile( "(?:(?:template\s*\<\s*(?:class|typename)\s+(?P<template_type>[\w]+)\s*\>)|(?:(?P<virtual>virtual)[ \t\f\v]*)|(?:(?P<static>static)[ \t\f\v]*))?(?:(?P<method_const_return_type>const)[ \t\f\v]*)?(?:[ \t\f\v]*(?P<method_return_type>[<>^:\w&* \t\f\v]+?[&*\s]+))(?P<method_name>\w+[,()\s\w~*+=/*^!<>\[\]|&%-]*)\s*\((?P<method_arguments>(?:[\w=\"'.&\s*:\[\]]+\s*,?\s*)*)\)\s*(?P<const>const)?\s*(?P<implemented>{)?" ) # regular expression to match special class methods such as constructor and destructor special_method_re = re.compile( "(?P<method_name>~?" + self.name + ")\s*\((?P<method_arguments>(?:[\w&\s*:]+[\[\]\w]+\s*,?\s*)*)\)\s*(?P<implemented>{)?" ) for string in string_split: match = access_re.search(string) if match is not None: access = match.group(1) continue while True: class_match = class_re.search(string) if class_match is None: break class_or_struct = class_match.group("class_or_struct") struct = False if class_or_struct == "struct": struct = True class_name = class_match.group("class_name") inherited_classes = [] if class_match.group("inherited_classes"): inherited_classes = [i.strip() for i in class_match.group("inherited_classes").split(",")] start = class_match.end() - 1 end, output = parse_brackets(string[start:]) string = string[: class_match.start()] + string[end:] cpp_class = CppClass(class_name, inherited_classes, struct, self) cpp_class.parse_header(output) if class_match.group("template_type") is not None: cpp_class.templated = True cpp_class.template_type = class_match.group("template_type") self.classes[access].append(cpp_class) method_matches = method_re.finditer(string) for match in method_matches: method_name = match.group("method_name") method_name = method_name.strip() method_return_type = match.group("method_return_type") if method_return_type is not None: method_return_type = method_return_type.strip() # deal with the regular incorrectly matching a constructor if method_return_type == "": continue method_arguments = [] for argument in clean_string(match.group("method_arguments")).split(","): # deal with default arguments if "=" in argument: argument = argument[argument.find("=") - 1] method_arguments.append(argument.strip()) cpp_method = CppMethod(method_name, method_arguments, method_return_type, self) if match.group("virtual") is not None: cpp_method.virtual = True if match.group("implemented") is not None: cpp_method.implemented = True if match.group("const") is not None: cpp_method.const = True if match.group("template_type") is not None: cpp_method.templated = True cpp_method.template_type = match.group("template_type") if match.group("method_const_return_type") is not None: cpp_method.const_return_type = True if match.group("static") is not None: cpp_method.static = True self.methods[access].append(cpp_method) special_method_matches = special_method_re.finditer(string) for match in special_method_matches: method_name = match.group("method_name") method_name = method_name.strip() method_arguments = [i.strip() for i in match.group("method_arguments").split(",")] cpp_method = CppMethod(method_name, method_arguments, parent=self) if match.group("implemented") is not None: cpp_method.implemented = True self.methods[access].append(cpp_method)
def sentence_to_words(string): string = utils.clean_string(string.lower(), stripdot=True) split = string.split(' ') split = map(stemmer.lemmatize, split) return split