def record_line(self, row, indices, status): ''' extract the occupation and state information from each line of input, store them into the dictionaries initialized in the class This runs in O(1), since it only get two items from list (ignoring strip) ''' if len(row) <= indices[-1]: print('Warning: skipping one record because of incorrect format') elif row[indices[0]] == status: self.status_num += 1 occ_key, state_key = strip(row[indices[1]]), strip(row[indices[2]]) self.occupation_dict[occ_key] = self.occupation_dict.get( occ_key, 0) + 1 self.state_dict[state_key] = self.state_dict.get(state_key, 0) + 1
def _getSecurityInfoFromRow(row): return { 'symbol': row.xpath('td[2]/a/text()')[0].strip(), 'name': row.xpath('td[3]/a/text()')[0].strip(), 'start_date': datetime.datetime.strptime( row.xpath('td[4]/text()')[0].strip(), '%d/%m/%Y'), # TODO: clarify the meaning of the below 2 fields 'total_listing_quantity': int(utils.strip(row.xpath('td[5]/text()')[0], ' .,')), 'total_listing_value': int(utils.strip(row.xpath('td[6]/text()')[0], ' .,')), }
def run(self, args): """Main API method. """ self.flag = utils.strip(args['<flag>']) self.arg = utils.strip(args['<argument>']) actions = ('store', 'search', 'launch', 'run', 'config', 'help') for action in actions: if args.get(action): method_name = '{}_codepath'.format(action) method = getattr(self, method_name, None) if method: return method() else: raise ValueError('Unknown action : {}'.format(action))
def store_key(self, user, key, s): """ Stores user key to the database. :param user: :param key: :param s: current DB session :return: """ # Loading phase existing_key = None try: if self.merge or self.update_keys: existing_key = self.load_existing_key(key, s) except Exception as e: logger.warning('Exception: %s' % e) # Storing phase try: if existing_key is not None: existing_key.date_last_check = salch.func.now() s.merge(existing_key) return existing_key key_id = int(key['id']) key_raw = key['key'] key_type, key_val = [utils.strip(x) for x in key_raw.split(' ', 1)] db_key = GitHubKey() db_key.id = key_id db_key.key_id = key_id db_key.key_type = key_type db_key.key_user_found = user.user_name db_key.key_user_id_found = user.user_id db_key.text_raw = key_raw if key_type == 'ssh-rsa': try: key_obj = utils.load_ssh_pubkey(key_raw) if isinstance(key_obj, RSAPublicKey): db_key.key_size = key_obj.key_size numbers = key_obj.public_numbers() db_key.key_modulus_hex = '%x' % numbers.n db_key.key_exponent = numbers.e except Exception as e: logger.info('Exception during processing the key[%s]: %s' % (key_type, e)) s.add(db_key) return db_key except Exception as e: utils.silent_rollback(s) logger.warning('Exception during key store: %s' % e) return 1
def decrypt(text, key): ''' Function -- decrypt decrypts cipher text by replacing letters in the text with letters in the alphabet based on key index paramters: text -- cipher text string key -- plain text string of len 26 returns decrypted plain text version of the cipher text ''' # check validity of the inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, str): raise TypeError('key must be a string') if not isinstance(text, str): raise TypeError('text must be a string') try: text = utils.strip(text) text = utils.process_text(text) text = utils.latin_caps(text) key = utils.strip(key) key = utils.process_text(key) key = utils.latin_caps(key) except ValueError: raise ValueError('text and key must only contain valid letters') if not utils.check_full(key): raise ValueError('key must contain only each letter of alphabet once') # alphabet and cipher alphabet_str = string.ascii_uppercase alphabet = list(alphabet_str) plain = '' # iterate through text and replace letter by key index for letter in text: if letter in alphabet: plain = plain + alphabet[key.index(letter)] else: raise ValueError('text and key must only contain valid letters') return plain
def setup(self, request, *args, **kwargs): super(CreateView, self).setup(request, *args, **kwargs) self.params.read_values(post_params={ 'add_note': utils.validate_yes, 'age': utils.validate_age, 'alternate_family_names': utils.strip, 'alternate_given_names': utils.strip, 'author_email': utils.strip, 'author_made_contact': utils.validate_yes, 'author_name': utils.strip, 'author_phone': utils.strip, 'users_own_email': utils.strip, 'users_own_phone': utils.strip, 'clone': utils.validate_yes, 'description': utils.strip, 'email_of_found_person': utils.strip, 'family_name': utils.strip, 'given_name': utils.strip, 'home_city': utils.strip, 'home_country': utils.strip, 'home_neighborhood': utils.strip, 'home_postal_code': utils.strip, 'home_state': utils.strip, 'last_known_location': utils.strip, 'note_photo_url': utils.strip, 'phone_of_found_person': utils.strip, 'photo_url': utils.strip, 'referrer': utils.strip, 'sex': utils.validate_sex, 'source_date': utils.strip, 'source_name': utils.strip, 'source_url': utils.strip, 'status': utils.validate_status, 'text': utils.strip, 'own_info': utils.validate_yes, }, file_params={ 'note_photo': utils.validate_django_image, 'photo': utils.validate_django_image, }) if request.method == 'POST': profile_urls = [] profile_field_index = 0 # 100 profile URLs should be enough for anyone. for profile_field_index in range(100): field_key = 'profile-url-%d' % profile_field_index if field_key in self.request.POST: profile_urls.append( utils.strip(self.request.POST[field_key])) else: break if profile_urls: self.params['profile_urls'] = profile_urls
def setup(): strip = utils.strip(32, 21, 22) rainbowFill(strip) time.clock() #test sliding the contents of the strip while time.clock()<2: strip.slide(1) strip.post() time.sleep(1.0/16) pass
def login(): if request.method == "POST": email = strip(request.form["email"]) logging.info("email: {}".format(email)) if not email: return jsonify({"error":True, "msg":u"请输入邮箱"}) password = strip(request.form["password"]) logging.info("password: {}".format(password)) if not password: return jsonify({"error":True, "msg":u"请输入密码"}) if not User().email_is_existed(email): return jsonify({"error":True, "msg":u"邮箱不存在"}) if not User().verify_password(email, password): return jsonify({"error":True, "msg":u"密码不正确"}) # Loging success session["email"] = email session["username"] = User.get_username(email) return jsonify({"error":False, "redirect":"/backend"}) return render_template('/frontend/login.html')
def collect_page(self, page): # navigate to page self.browser.get( 'https://www.facebook.com/' + page + '/') # Scroll down depth-times and wait delay seconds to load # between scrolls for scroll in range(self.depth): # Scroll down to bottom self.browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") # Wait to load page time.sleep(self.delay) # Once the full page is loaded, we can start scraping links = self.browser.find_elements_by_link_text("See more") for link in links: link.click() posts = self.browser.find_elements_by_class_name( "userContentWrapper") poster_names = self.browser.find_elements_by_xpath( "//a[@data-hovercard-referer]") for count, post in enumerate(posts): # Creating first CSV row entry with the poster name (eg. "Donald Trump") analysis = [poster_names[count].text] # Creating a time entry. time_element = post.find_element_by_css_selector("abbr") utime = time_element.get_attribute("data-utime") analysis.append(utime) # Creating post text entry text = post.find_element_by_class_name("userContent").text status = utils.strip(text) analysis.append(status) # Write row to csv utils.write_to_csv(self.out_file, analysis) with open('post_csv.csv', 'w+', newline='', encoding='utf-8') as csv_file: writer = csv.writer(csv_file, delimiter=";") writer.writerow('my_utf8_string')
def data_split(dataset_dir, train_percent, valid_percent, filelists_dir, train_file, valid_file): if not pt.exists(dataset_dir): print(f"Error: Dataset directory '{dataset_dir}' does not exist.") utils.ensure_dirs(filelists_dir) dataset_file = pt.join(dataset_dir, "metadata.csv") wavs_dir = pt.join(dataset_dir, "wavs") train_filepath = pt.join(filelists_dir, train_file) valid_filepath = pt.join(filelists_dir, valid_file) data = [l for l in open(dataset_file, "r")] train_file = open(train_filepath, "w") valid_file = open(valid_filepath, "w") num_of_data = len(data) num_train = int((train_percent / 100.0) * num_of_data) num_valid = int((valid_percent / 100.0) * num_of_data) data_fractions = [num_train, num_valid] split_data = [[], [], []] rand_data_ind = 0 for split_ind, fraction in enumerate(data_fractions): for _i in range(fraction): rand_data_ind = random.randint(0, len(data) - 1) file, text = data[rand_data_ind].split("|")[:2] file = utils.strip(pt.basename(file), ".wav") l = pt.join(wavs_dir, file + ".wav") + "|" + text.strip() + "\n" split_data[split_ind].append(l) data.pop(rand_data_ind) for l in split_data[0]: train_file.write(l) for l in split_data[1]: valid_file.write(l) train_file.close() valid_file.close()
def generate_sentences(sample_ratio=0.5): assert .0 < sample_ratio < 1. while True: files = [open(corpus, 'r') for corpus in CORPUS_LIST] files_count = len(files) assert len(files) > 0, "no " cont_empty_line_count = 0 for file in cycle(files): line = file.readline() if not line: cont_empty_line_count += 1 if cont_empty_line_count > files_count * 2: break continue cont_empty_line_count = 0 line = strip(line, '\n\t ') if not line: continue for piece in split_to_pieces(line): if random.random() <= sample_ratio: yield piece
async def parse_list(page): x_query = '//form[@name="transparentForm"]//table[thead]/tbody/tr' await page.waitForXPath(x_query) rows = await page.xpath(x_query) for row in rows: d = { "code": strip(await (await (first(await row.xpath('./td[1]')).getProperty('textContent'))).jsonValue()), "name": strip(await (await (first(await row.xpath('./td[2]')).getProperty('textContent'))).jsonValue()), "drug_type": strip( await (await (first(await row.xpath('./td[3]')).getProperty('textContent'))).jsonValue()), "apply_type": strip( await (await (first(await row.xpath('./td[4]')).getProperty('textContent'))).jsonValue()), "reg_type": strip(await (await (first(await row.xpath('./td[5]')).getProperty('textContent'))).jsonValue()), "pharm_name": strip( await (await (first(await row.xpath('./td[6]')).getProperty('textContent'))).jsonValue()), "accept_date": strip( await (await (first(await row.xpath('./td[7]')).getProperty('textContent'))).jsonValue()) or None, } await process_item(d)
def decrypt(text, key): ''' Function -- decrypt decrypts cipher text by shifting letter indices to the left in the alphabet per the key (reversed if negative) paramters: text -- cipher text string key -- integer indicating magnitude and direction of index shift returns decrypted plain text version of the cipher text ''' # check validity of inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, int): raise TypeError('key must be an integer') if not isinstance(text, str): raise TypeError('text must be a string') try: text = utils.strip(text) text = utils.process_text(text) except ValueError: raise ValueError('text must only contain latin letters') key = key % 26 # decrypt the string plain = '' a_index = list(range(0, 26)) for x in text: if ord(x) >= 65 and ord(x) <= 90: plain += chr(a_index[((ord(x) - 65) - key) % 26] + 65) else: raise ValueError('text must only contain latin letters') return plain
def _getQuoteFromRow(row): return { 'date': datetime.datetime.strptime( row.xpath('td[1]/text()')[0].strip(), '%d/%m/%Y'), 'opening_price': int(utils.strip(row.xpath('td[3]/text()')[0], ' ,')) * 10, 'max_price': int(utils.strip(row.xpath('td[4]/text()')[0], ' ,')) * 10, 'min_price': int(utils.strip(row.xpath('td[5]/text()')[0], ' ,')) * 10, 'closing_price': int(utils.strip(row.xpath('td[6]/text()')[0], ' ,')) * 10, 'avg_price': int(utils.strip(row.xpath('td[7]/text()')[0], ' ,')) * 10, 'volume': int(utils.strip(row.xpath('td[9]/text()')[0], ' .')), }
def price_from_containers(containers): # Given javascript containers, parses all the prices in each container. for container in containers: raw_prices = container.find_all('div', {"class": "avail-fare-price"}) prices = [float(strip(price)) for price in raw_prices] yield prices
def test_strip(self): assert utils.strip(' ') == '' assert utils.strip(u' ') == u'' assert utils.strip(' x ') == 'x' assert utils.strip(u' x ') == u'x' raises(Exception, utils.strip, None)
from utils import retag, conditional_join, strip crfpp_prefix = "../bin/" template_path = "../train.template" grobid_data_path = "../data/grobid/grobid.tagged.txt" cora_data_path = "../data/cora/cora.tagged.txt" ms_data_path = "/tmp2/b02902030/Engdir" #ms_data_path = "/tmp2/KIE/PDFs" test_path = "../data/test.dat" pred_path = "../data/pred.dat" train_path = "../data/train.dat" if __name__ == "__main__": dataset2 = Grobid.Grobid(grobid_data_path) #totally 500 articles dataset1 = MicrosoftAcademicGraph.MicrosoftAcademicGraph(ms_data_path) #totally 500 articles model2 = Crfpp.Crfpp(crfpp_prefix, template_path) model1 = NaiveBayes.NaiveBayes() dataset1.extract_crf_features(train_path,first_n_token = 70) dataset2.extract_crf_features(test_path) strip(train_path) strip(test_path) model2.fit(train_path) model2.predict(test_path, pred_path, last_feature = False) print "Author Precision / Title Precision / Author Recall / Title Recall" print model2.score(test_path, pred_path)
def parse_nyt(): url = "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=stock+market+crash+&begin_date=20070101&end_date=20090101&api-key=318a69b2af97848f66071cb4c1fdc831:15:69992102" response = urlopen(url).read() response = json.loads(response) print "Got response from nytimes" articleContent = [] i = 0 page = 1 hits = response["response"]["meta"]["hits"] while i<51 and page<(hits/10): print 'Getting response for page',page url = "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=stock+market+crash+&begin_date=20070101&end_date=20090101&page="+str(page)+"&api-key=318a69b2af97848f66071cb4c1fdc831:15:69992102" try: response = urlopen(url).read() response = json.loads(response) for article in response["response"]["docs"]: if random.randint(0,3) == 3: #1/3 probability print article["web_url"] soup1 = BeautifulSoup(utils.getData(article["web_url"])) soup = soup1.findAll("p",{"itemprop": "articleBody"}) if soup == None or len(soup) == 0: soup = soup1.find("div", {"id": "articleBody"}) if soup!=None: soup = soup.findAll("p") if soup == None or len(soup)==0: soup = soup1.find("div", {"class": "articleBody"}) if soup!=None: soup = soup.findAll("p") if soup!=None and len(soup)>0: if article["word_count"]>200 and article["lead_paragraph"]!=None: articleContent.append({}) articleContent[i]["abstract"] = article["abstract"] articleContent[i]["pub_date"] = article["pub_date"] articleContent[i]["headline"] = article["headline"]["main"] articleContent[i]["keywords"] = article["keywords"] articleContent[i]["lead_paragraph"] = article["lead_paragraph"] articleContent[i]["web_url"] = article["web_url"] articleContent[i]["id"] = article["_id"] articleContent[i]["word_count"] = article["word_count"] keywords = "" keywords = getMultiples(article["keywords"],"value") # should probably pull these if/else checks into a module # variables = [article["pub_date"], keywords, str(article["headline"]["main"]) if "main" in article["headline"].keys() else "", str(article["source"]) if "source" in article.keys() else "", str(article["document_type"]) if "document_type" in article.keys() else "", article["web_url"] if "web_url" in article.keys() else "",str(article["news_desk"]) if "news_desk" in article.keys() else "",str(article["section_name"]) if "section_name" in article.keys() else "",str(article["lead_paragraph"]).replace("\n","") if "lead_paragraph" in article.keys() else ""] # line = "\t".join(variables) # articleContent[i]["text"] = line sent = "" if type(soup) is not str: sent = " ".join([str(word) for word in soup]) else: sent = soup articleContent[i]["text"] = utils.strip(sent) print articleContent[i]["headline"],article["keywords"],article["lead_paragraph"] i+=1 print 'Extracted',i,article["pub_date"] if i>51: break except: print "Skipped" page+=1 print "Articles Extracted",i return articleContent
def _getNumber(string): return int(utils.strip(string.split(',')[0], '.,'))
for i in range(1,m.lastoutputlineno+1): if m.outputtoinput.has_key(i): os.write(fd, "%d %d\n" % (i, m.outputtoinput[i])) transforms += 1 except ParseError as e: print "\nParse error (%s) while performing %s->%s:\n" % (str(e), moduleNames[index-1] if index>0 else '', moduleName) print "%s%s%s" % (utils.colors.YELLOW, utils.snippet(sourcecode, Merger.getLineNumber(e), Merger.getColumnNumber(e), 5, True), utils.colors.NO) sys.exit(1) except module.ModuleError as e: print "Module error (%s) while performing %s->%s.\n" % (str(e), moduleNames[index-1] if index>0 else '', moduleName) sys.exit(1) except KeyboardInterrupt as e: #print "\nInterrupted by user.\n" sys.exit(1) except ImportError as e: print "Import error (%s),\nplease re-install the tool.\n" % str(e) sys.exit(1) except: print "\nError while performing %s->%s:\n" % (moduleNames[index-1] if index>0 else '', moduleName) #print "ERROR: %s */\n" % sys.exc_info()[0] traceback.print_exc(file=sys.stdout) print "\n\n parsing coords: <%s>" % (m.Parser.currentInputCoord) sys.exit(1) print utils.strip(sourcecode) if __name__ == "__main__": main(sys.argv[0:])
def __iter__(self): for row in self.reader: yield strip(row)
def encrypt(text, key): ''' Function -- encrypt encrypts plain text by shifting letter indices to the right in the alphabet per the key (reversed if negative) paramters: text -- plain text string key -- integer indicating number of rails returns encrypted string cipher text version of the plain text ''' # check validity of the inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, int): raise TypeError('key must be an integer') if key < 2: raise ValueError('key must be at least 2') if not isinstance(text, str): raise TypeError('text must be a string') try: text = utils.strip(text) text = utils.process_text(text) except ValueError: raise ValueError('text must only contain valid letters') key = int(key) cipher = '' # create rail matrix matrix = [] for i in range(key): matrix.append([]) # fill it up cycle = len(matrix) - 1 prev_rail = 0 current_rail = 0 for x in text: if current_rail - prev_rail == 0: matrix[current_rail].append(x) current_rail += 1 elif current_rail - prev_rail > 0: matrix[current_rail].append(x) if current_rail >= cycle: current_rail -= 1 prev_rail += 1 else: current_rail += 1 prev_rail += 1 elif current_rail - prev_rail < 0: matrix[current_rail].append(x) if current_rail <= 0: current_rail += 1 prev_rail -= 1 else: current_rail -= 1 prev_rail -= 1 # get cipher text for level in matrix: cipher += ''.join(level) return cipher
def decrypt(text, key): ''' Function -- decrypt decrypts cipher text putting letters into matrices indicating length and arrangement of rails to generate plain text paramters: text -- cipher text string key -- integer indicating number of rails returns decrypted string plain text version of the cipher text ''' # check validity of the inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, int): raise TypeError('key must be an integer') if key < 2: raise ValueError('key must be at least 2') if not isinstance(text, str): raise TypeError('text must be a string') try: text = utils.strip(text) text = utils.process_text(text) except ValueError: raise ValueError('text must only contain valid letters') key = int(key) # decrypt the string plain = '' # create rail matrix matrix = [] for i in range(key): matrix.append([]) # find rail lens total = len(text) cycle = (key * 2) - 2 rail_lens = [] if total % cycle == 0: top = total / cycle bot = top mid = 2 * top # add top rail_lens.append([top]) # add middle rows for x in range(key - 2): rail_lens.append([mid]) # add bottom rail_lens.append([bot]) else: top = (total // cycle) bot = top mid = 2 * top # add top rail_lens.append([top + 1]) # add middle rows for x in range(key - 2): rail_lens.append([mid]) # add bottom and leftovers rail_lens.append([bot]) left_over = int(total % cycle) for i in range(1, left_over): if i <= key - 1: rail_lens[i].append(1) else: rail_lens[cycle - i].append(1) for i in range(len(rail_lens)): rail_lens[i] = [sum(rail_lens[i])] # generate matrix base = 0 for i in range(len(rail_lens)): for j in range(base, rail_lens[i][0] + base): matrix[i].append(text[j]) base = base + rail_lens[i][0] # build plain text prev_rail = 0 current_rail = 0 for letter in range(total): if current_rail - prev_rail == 0: plain = plain + matrix[current_rail].pop(0) current_rail += 1 elif current_rail - prev_rail > 0: plain = plain + matrix[current_rail].pop(0) if current_rail >= key - 1: current_rail -= 1 prev_rail += 1 else: current_rail += 1 prev_rail += 1 elif current_rail - prev_rail < 0: plain = plain + matrix[current_rail].pop(0) if current_rail <= 0: current_rail += 1 prev_rail -= 1 else: current_rail -= 1 prev_rail -= 1 return plain
def decrypt(text, key): ''' Function -- decrypt decrypts cipher text by generating a playfair square and reversing the cipher paramters: text -- cipher text string key -- string to help make the 5 x 5 encryption matrix returns decrypted plain text version of the cipher text ''' # check validity of the inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, str): raise TypeError('key must be a string') if not isinstance(text, str): raise TypeError('text must be a string') try: text = utils.strip(text) text = utils.process_text(text) text = utils.latin_caps(text) key = utils.strip(key) key = utils.process_text(key) key = utils.latin_caps(key) except ValueError: raise ValueError('text and key must only contain valid letters') if len(text) % 2 != 0: text = text + 'X' # remove repeats in the key and other letters keys = [] for letter in key: if letter not in keys: keys.append(letter) keys = [letter if letter != 'J' else 'I' for letter in keys] leftover_letters = list(map(chr, range(65, 91))) leftover_letters.remove('J') for letter in keys: if letter in leftover_letters: leftover_letters.remove(letter) # generate the cipher square matrix = [[j for j in range(5)] for i in range(5)] for row in range(len(matrix)): for col in range(len(matrix[row])): if len(keys) > 0: matrix[row][col] = keys.pop(0) else: matrix[row][col] = leftover_letters.pop(0) # process the cipher text (add Xs for duplicates and odd len words) letters = [char if char != 'J' else 'I' for char in text] if len(letters) % 2 != 0: letters.append('X') paired = [letters[i] + letters[i + 1] for i in range(0, len(letters), 2)] pairs = [] for i in range(len(paired)): if paired[i][0] == paired[i][1]: paired[i] = paired[i][0] + 'X' # get letter positions in the matrix plain = '' pos_1 = [0, 0] pos_2 = [0, 0] for pair in paired: for row in range(len(matrix)): if pair[0] in matrix[row]: for col in range(len(matrix[row])): if matrix[row][col] == pair[0]: pos_1[0] = row pos_1[1] = col if pair[1] in matrix[row]: for col in range(len(matrix[row])): if matrix[row][col] == pair[1]: pos_2[0] = row pos_2[1] = col # decrypt if pos_1[0] == pos_2[0]: plain += matrix[pos_1[0]][(pos_1[1] - 1)] plain += matrix[pos_2[0]][(pos_2[1] - 1)] elif pos_1[1] == pos_2[1]: plain += matrix[(pos_1[0] - 1)][pos_1[1]] plain += matrix[(pos_2[0] - 1)][pos_2[1]] else: plain += matrix[pos_1[0]][pos_2[1]] plain += matrix[pos_2[0]][pos_1[1]] return plain
def is_challenge_response(data): ''' Check if the challenge response received from the USB device is valid. :param data: the raw data ''' return utils.strip(data) == packets.CHALLENGE_RESPONSE
def encrypt(text, key): ''' Function -- encrypt encrypts plain text by shifting letter indices to the right in the alphabet per the key (reversed if negative) paramters: text -- plain text string key -- integer indicating magnitude and direction of index shift returns encrypted cipher text version of the plain text ''' # check validity of the inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, str): raise TypeError('key must be a string') if not isinstance(text, str): raise TypeError('text must be a string') try: text = utils.strip(text) text = utils.process_text(text) text = utils.latin_caps(text) key = utils.strip(key) key = utils.process_text(key) key = utils.latin_caps(key) except ValueError: raise ValueError('text and key must only contain valid letters') # remove repeats in the key and other letters keys = [] for letter in key: if letter not in keys: keys.append(letter) keys = [letter if letter != 'J' else 'I' for letter in keys] leftover_letters = list(map(chr, range(65, 91))) leftover_letters.remove('J') for letter in keys: if letter in leftover_letters: leftover_letters.remove(letter) # generate the cipher square matrix = [[j for j in range(5)] for i in range(5)] for row in range(len(matrix)): for col in range(len(matrix[row])): if len(keys) > 0: matrix[row][col] = keys.pop(0) else: matrix[row][col] = leftover_letters.pop(0) # process the plain text (add Xs for duplicates and odd len words) first = [] letters = [char for char in text] for i in range(len(letters)): if letters[i] == 'J': letters[i] = 'I' if len(first) == 0: first.append(letters[i]) elif letters[i] != first[-1]: first.append(letters[i]) elif len(first) % 2 == 1: first.append('X') else: first.append(letters[i]) if len(first) % 2 != 0: first.append('X') text = ''.join(letters) paired = [first[i] + first[i + 1] for i in range(0, len(first), 2)] # get letter positions in the matrix cipher = '' pos_1 = [0, 0] pos_2 = [0, 0] for pair in paired: for row in range(len(matrix)): if pair[0] in matrix[row]: for col in range(len(matrix[row])): if matrix[row][col] == pair[0]: pos_1[0] = row pos_1[1] = col if pair[1] in matrix[row]: for col in range(len(matrix[row])): if matrix[row][col] == pair[1]: pos_2[0] = row pos_2[1] = col # encrypt if pos_1[0] == pos_2[0]: cipher += matrix[pos_1[0]][(pos_1[1] + 1) % 5] cipher += matrix[pos_2[0]][(pos_2[1] + 1) % 5] elif pos_1[1] == pos_2[1]: cipher += matrix[(pos_1[0] + 1) % 5][pos_1[1]] cipher += matrix[(pos_2[0] + 1) % 5][pos_2[1]] else: cipher += matrix[pos_1[0]][pos_2[1]] cipher += matrix[pos_2[0]][pos_1[1]] return cipher
#N : the articles on which Crfpp trains N = (0, 10) #M : the articles on which NaiveBayes trains M = (100, 200) #K : the articles on which NaiveBayes tests, and then add it with N K = (10, 100) #N : the articles that CRFPP tests on R = (200, dataset.article_num) print "N = %d, M = %d, K = %d, R = %d" % (N[1]-N[0], M[1]- M[0], K[1]-K[0] ,R[1]-R[0]) #NaiveBayes trains on M and tests on K dataset.extract_training_knowledge_base_features(kb_train_path, article_range = M) strip(kb_train_path) dataset.extract_testing_knowledge_base_features(kb_test_path, article_range = K) strip(kb_test_path) model1.fit(kb_train_path) model1.predict(kb_test_path, kb_pred_path, last_feature = False) print "model1 valid score:", model1.score(kb_test_path, kb_pred_path) # CRF trains on N and tests on R dataset.extract_crf_features(crf_train_path, article_range = N) strip(crf_train_path) dataset.extract_crf_features(crf_test_path, article_range = R) strip(crf_test_path) model2.fit(crf_train_path) model2.predict(crf_test_path, crf_pred_path, last_feature = False) print "model2 without adding data",model2.score(crf_test_path, crf_pred_path)
async def removeError(m, edit): userid = ul.parse_mention(m) if userid == None: if edit: await m.channel.send("I wasn't able to understand that message: `$remove USER [num] new_message`") else: await m.channel.send("I wasn't able to understand that message: `$remove USER [num]`") return username = ul.fetch_username(m.guild, userid) if username == None: username = str(userid) # If editing, and no message specified, abort. mes = utils.parseMessage(m.content, username) if mes == "": if edit: await m.channel.send("You need to specify an edit message") return else: mes = "0" try: index = int(mes.split()[0]) - 1 mes = utils.strip(mes) except (IndexError, ValueError): index = -1 # Find most recent entry in database for specified user search_results = db.search(userid) # If no results in database found, can't modify if search_results == []: await m.channel.send("I couldn't find that user in the database") # If invalid index given, yell elif (index > len(search_results) - 1) or index < -1: await m.channel.send("I can't modify item number {}, there aren't that many for this user".format(index + 1)) else: item = search_results[index] import visualize if edit: if item.log_type == LogTypes.NOTE.value: currentTime = datetime.datetime.utcnow() item.timestamp = currentTime item.log_message = mes item.staff = m.author.name db.add_log(item) out = "The log now reads as follows:\n{}\n".format(str(item)) await m.channel.send(out) return else: await m.channel.send("You can only edit notes for now") return # Everything after here is deletion db.remove_log(item.dbid) out = "The following log was deleted:\n" out += str(item) if item.log_type == LogTypes.BAN: visualize.updateCache(item.staff, (-1, 0), utils.formatTime(item.timestamp)) elif item.log_type == LogTypes.WARN: visualize.updateCache(item.staff, (0, -1), utils.formatTime(item.timestamp)) await m.channel.send(out) # Search logging channel for matching post, and remove it try: if item.message_url != 0: chan = discord.utils.get(m.guild.channels, id=config.LOG_CHAN) m = await chan.fetch_message(item.message_url) await m.delete() # Print message if unable to find message to delete, but don't stop except discord.errors.HTTPException as e: print("Unable to find message to delete: {}", str(e))
def cheapest(list): # Returns the a list cheapest flights from a list of flights min_price = min(strip(trip['price']) for trip in list) cheapest_trips = [x for x in list if strip(x['price']) == min_price] return cheapest_trips
def main(): parser = argparse.ArgumentParser(description='GitHub contributors stats') parser.add_argument('-c', dest='config', default=None, help='JSON config file') parser.add_argument('files', nargs=argparse.ZERO_OR_MORE, default=[], help='files to process') args = parser.parse_args() config_file = args.config # load config with access tokens auths = [] with open(config_file, 'r') as fh: config = json.load(fh, object_pairs_hook=collections.OrderedDict) res_tmp = config['res'] random.shuffle(res_tmp) for user in res_tmp: auths.append(HTTPBasicAuth(user['usr'], user['token'])) print('login;contribs;repos;avg') def newrec(): def_info = collections.OrderedDict() def_info['contribs'] = 0 def_info['repos'] = 0 def_info['cons'] = [] def_info['repolist'] = [] return def_info contribs = collections.defaultdict(newrec) already_loaded = set() not_found = [] for fl in args.files: data = open(fl).read().split('\n') for repo in data: repo_parts = utils.strip_leading_slash(repo).split('/') author = utils.strip(repo_parts[-2]) repo_name = utils.strip(repo_parts[-1]) url = 'https://api.github.com/repos/%s/%s' % (author, repo_name) contrib_url = 'https://api.github.com/repos/%s/%s/contributors' % ( author, repo_name) if url in already_loaded: continue already_loaded.add(url) res = requests.get(contrib_url, timeout=10, auth=random.choice(auths)) js = res.json() if js is None: not_found.append(url) continue for contrib in js: login = contrib['login'] usr = contribs[login] usr['contribs'] += contrib['contributions'] usr['repos'] += 1 usr['cons'].append(contrib['contributions']) usr['repolist'].append('%s/%s' % (author, repo_name)) for login in contribs: rec = contribs[login] rdat = [ login, rec['contribs'], rec['repos'], float(rec['contribs']) / float(rec['repos']), '|'.join(rec['repolist']) ] print(';'.join([str(x) for x in rdat]))
def encrypt(text, alpha_key, num_key, add_key): ''' Function -- encrypt encrypts plain text by filling a 3x10 key using the first two keys. Letters in the plain text are then assigned numbers via the key square. The digits of this set of numbers is added sequentially to the add_key to yield the final cipher number. paramters: text -- plain text string alpha_key -- full alphabet in any order (str) num_key -- 2 digit integer (string or int) add_key -- int of any length returns encrypted cipher text version of the plain text (int) ''' # check validity of the inputs if add_key == '' or num_key == '' or alpha_key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(text, str): raise TypeError('text must be a string') if not isinstance(alpha_key, str): raise TypeError('all keys must be strings') ''' if not isinstance(num_key, str): raise TypeError('all keys must be strings') if not isinstance(add_key, str): raise TypeError('all keys must be strings') ''' num_key = str(num_key) if (len(num_key) != 2 or not num_key[0].isnumeric() or not num_key[1].isnumeric() or num_key[0] == num_key[1]): raise ValueError('num_key must be have 2 unique integer digits') if int(num_key[0]) == 0 or int(num_key[1]) == 0: raise ValueError('digits of num_key must be greater than 0') if not utils.check_full(alpha_key): msg = ('alpha_key must contain only each letter of alphabet once') raise ValueError(msg) alpha_key = alpha_key.upper() try: int(num_key) num_key = str(num_key) add_key = int(add_key) if int(num_key) < 0 or int(add_key) < 0: print('Warning: negative keys will be assumed to be positive') except ValueError: raise ValueError('num_key and add_key must be ints') try: text = utils.strip(text) text = utils.process_text(text) text = utils.latin_caps(text) except ValueError: raise ValueError('text must only contain valid letters') # construct cipher square key_square = {} # first row key_square[0] = [alpha_key[i] for i in range(8)] if int(num_key[0]) > int(num_key[1]): key_square[0].insert(int(num_key[1]), '') key_square[0].insert(int(num_key[0]), '') else: key_square[0].insert(int(num_key[0]), '') key_square[0].insert(int(num_key[1]), '') # next rows key_square[int(num_key[0])] = [alpha_key[i] for i in range(8, 18)] key_square[int(num_key[1])] = [alpha_key[i] for i in range(18, 26)] key_square[int(num_key[1])].append('0') key_square[int(num_key[1])].append('1') # generate numeric text num_text = '' for letter in text: if letter in key_square[0]: num_text = num_text + str(key_square[0].index(letter)) elif letter in key_square[int(num_key[0])]: combo = num_key[0] + str(key_square[int(num_key[0])].index(letter)) num_text = num_text + combo elif letter in key_square[int(num_key[1])]: combo = num_key[1] + str(key_square[int(num_key[1])].index(letter)) num_text = num_text + combo else: raise ValueError('letters in plain text must be latin') # add the adder text adder = '' for i in range(len(num_text)): added = int(num_text[i]) + int(str(add_key)[i % len(str(add_key))]) adder = adder + str(added)[-1] # make cipher text via the grid cipher = '' count = 0 while count < len(adder): num = int(adder[count]) if key_square[0][num] == '': if (count + 1) >= len(adder): count += 2 else: next_num = int(adder[count + 1]) cross = key_square[num][next_num] cipher = cipher + cross count += 2 else: cipher += key_square[0][num] count += 1 return cipher
def main(): parser = argparse.ArgumentParser(description='GitHub repo stats') parser.add_argument('-c', dest='config', default=None, help='JSON config file') parser.add_argument('files', nargs=argparse.ZERO_OR_MORE, default=[], help='files to process') args = parser.parse_args() config_file = args.config # load config with access tokens auths = [] with open(config_file, 'r') as fh: config = json.load(fh, object_pairs_hook=collections.OrderedDict) res_tmp = config['res'] random.shuffle(res_tmp) for user in res_tmp: auths.append(HTTPBasicAuth(user['usr'], user['token'])) print( 'repo;stars;watchers;forks;open_issues;subscribers_count;network_count;size;language;' 'created_at;updated_at;pushed_at;owner;commits;branches;releases;contributors;closed_issues;' 'first_commit;last_commit;pull_open;pull_closed') already_loaded = set() not_found = [] for fl in args.files: data = open(fl).read().split('\n') for repo in data: repo_parts = utils.strip_leading_slash(repo).split('/') author = utils.strip(repo_parts[-2]) repo_name = utils.strip(repo_parts[-1]) url = 'https://api.github.com/repos/%s/%s' % (author, repo_name) main_url = 'https://github.com/%s/%s' % (author, repo_name) issues_url = 'https://github.com/%s/%s/issues' % (author, repo_name) pulls_url = 'https://github.com/%s/%s/pulls' % (author, repo_name) commits_url = 'https://api.github.com/repos/%s/%s/commits' % ( author, repo_name) if url in already_loaded: continue already_loaded.add(url) res = requests.get(url, timeout=10, auth=random.choice(auths)) js = res.json() if js is None or 'stargazers_count' not in js: not_found.append(url) continue # load num of commits res = requests.get(main_url, timeout=10) tree = html.fromstring(res.text) lis = tree.xpath('//ul[@class="numbers-summary"]//li') lidata = [-1] * 4 for lidx, li in enumerate(lis): if lidx > 3: break try: lidata[lidx] = int(li.xpath('a/span')[0].text.strip()) except Exception as e: logger.debug('Excepton in parsing: %s' % e) #html.tostring(li)) # contributors load if lidata[3] == -1: url_contrib = js['contributors_url'] res = requests.get(url_contrib, timeout=10, auth=random.choice(auths)) jsc = res.json() if jsc is not None: lidata[3] = len(jsc) # closed issues res = requests.get(issues_url, timeout=10) tree = html.fromstring(res.text) js['closed'] = label_num(tree, 1) # commits, first & last res = requests.get(commits_url, timeout=10, auth=random.choice(auths)) cjs = res.json() last_commit = commit_time(cjs[0]) if 'Link' in res.headers: last_commit_page_url = get_last_link(res.headers['Link']) res = requests.get(last_commit_page_url, timeout=10, auth=random.choice(auths)) cjs = res.json() first_commit = commit_time(cjs[-1]) # pull requests res = requests.get(pulls_url, timeout=10) tree = html.fromstring(res.text) js['pull_open'] = label_num(tree, 0) js['pull_closed'] = label_num(tree, 1) rdat = [ '%s/%s' % (author, repo_name), js['stargazers_count'], js['watchers_count'], js['forks'], js['open_issues'], js['subscribers_count'], js['network_count'], js['size'], js['language'], timefix(js['created_at']), timefix(js['updated_at']), timefix(js['pushed_at']), js['owner']['login'], lidata[0], lidata[1], lidata[2], lidata[3], js['closed'], first_commit, last_commit, js['pull_open'], js['pull_closed'] ] print(';'.join([str(x) for x in rdat])) print('Repos not found:') for x in not_found: print('.. %s' % x)
def decrypt(text, alpha_key, num_key, add_key): ''' Function -- decrypt decrypts cipher text by reversing the cipher algorithm starting with adder text that is converted to numeric text and finally plain text using a key grid. paramters: text -- cipher text string of int literals alpha_key -- full alphabet in any order (str) num_key -- 2 digit integer (string or int) add_key -- int of any length returns decrypted plain text version of the cipher text ''' # check validity of the inputs if add_key == '' or num_key == '' or alpha_key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(text, str): raise TypeError('text must be a string') if not isinstance(alpha_key, str): raise TypeError('all keys must be strings') ''' if not isinstance(num_key, str): raise TypeError('all keys must be strings') if not isinstance(add_key, str): raise TypeError('all keys must be strings') ''' num_key = str(num_key) if (len(str(num_key)) != 2 or not num_key[0].isnumeric() or not num_key[1].isnumeric()): raise ValueError('num_key must be have 2 unique integer digits') if int(str(num_key)[0]) == 0 or int(str(num_key)[1]) == 0: raise ValueError('digits of num_key must be greater than 0') if not utils.check_full(alpha_key): msg = ('alpha_key must contain only each letter of alphabet once') raise ValueError(msg) alpha_key = alpha_key.upper() try: int(num_key) num_key = str(num_key) add_key = int(add_key) if int(num_key) < 0 or int(add_key) < 0: print('Warning: negative keys will be assumed to be positive') except ValueError: raise ValueError('num_key and add_key must be ints') try: text = utils.strip(text) text = text.upper() for x in text: if (ord(x) >= 65 and ord(x) <= 90) or x == '1' or x == '0': pass else: raise ValueError('text must only contain valid letters') except ValueError: raise ValueError('text must only contain valid letters') # construct cipher square key_square = {} # first row key_square[0] = [alpha_key[i] for i in range(8)] if int(num_key[0]) > int(num_key[1]): key_square[0].insert(int(num_key[1]), '') key_square[0].insert(int(num_key[0]), '') else: key_square[0].insert(int(num_key[0]), '') key_square[0].insert(int(num_key[1]), '') # next rows key_square[int(num_key[0])] = [alpha_key[i] for i in range(8, 18)] key_square[int(num_key[1])] = [alpha_key[i] for i in range(18, 26)] key_square[int(num_key[1])].append('0') key_square[int(num_key[1])].append('1') # generate adder text adder = '' for letter in text: if letter in key_square[0]: adder = adder + str(key_square[0].index(letter)) elif letter in key_square[int(num_key[0])]: combo = num_key[0] + str(key_square[int(num_key[0])].index(letter)) adder = adder + combo elif letter in key_square[int(num_key[1])]: combo = num_key[1] + str(key_square[int(num_key[1])].index(letter)) adder = adder + combo else: raise ValueError('letters in plain text must be latin') # generate numeric text num_text = '' for i in range(len(adder)): subbed = int(adder[i]) - int(str(add_key)[i % len(str(add_key))]) if subbed < 0: new_sub = list(range(10))[subbed] num_text = num_text + str(new_sub) else: num_text = num_text + str(subbed)[-1] # generate original plain text plain = '' count = 0 while count < len(num_text): num = int(num_text[count]) if key_square[0][num] == '': if (count + 1) >= len(num_text): count += 2 else: next_num = int(num_text[count + 1]) cross = key_square[num][next_num] plain = plain + cross count += 2 else: plain += key_square[0][num] count += 1 return plain
def test_strip(self): self.assertEqual('', utils.strip(' ')) self.assertEqual(u'', utils.strip(u' ')) self.assertEqual('x', utils.strip(' x ')) self.assertEqual(u'x', utils.strip(u' x ')) self.assertRaises(Exception, utils.strip, None)
async def on_message(message: Message): global paywalled_sites # include list of paywalled site inside this function global last_check_in #rate limiter if message: try: #TODO: Put some random fuzz on the checkin timedelta #TODO: Lower the checkin time delta based on the subsequent frequency if not last_check_in or last_check_in < (message.created_at - timedelta(seconds=60)): #grab the non-bot members memb_ls = [ m async for m in message.guild.fetch_members(limit=None) if not m.bot ] #grab the last ten minutes of messages, up to 600 messages last_check_in = message.created_at ten_min_ago = message.created_at - timedelta(seconds=600) messages = await message.channel.history( limit=600, after=ten_min_ago).flatten() #get the history of message authors who aren't bots human_authors_history = [ m.author for m in messages if m.author in memb_ls ] #get the unique authors human_author_set = set(human_authors_history) #if two users are talking prefix = None if len(human_author_set) == 2: prefix = f"{list(human_author_set)[0].mention} and {list(human_author_set)[1].mention} are " #if one user is talking to themself elif len(human_author_set) == 1: prefix = f"{list(human_author_set)[0].mention} is " if prefix: if len(messages) > 100: await message.channel.send(prefix + "going at it. Wow!") if len(messages) > 200: await message.channel.send( prefix + "getting into some serious behavior.") if len(messages) > 300: await message.channel.send(prefix + "setting a record!") if len(messages) > 400: await message.channel.send(prefix + "very serious about this!") if len(messages) > 500: await message.channel.send(prefix + ", shut up. Please.") except: pass if message.content.startswith('!paywall'): # Manually link to archive.is # Format: `!paywall URL` will link to archive.is/URL words = message.content.split(" ") await message.channel.send(f"https://www.archive.is/{words[1]}") if and_includes(message.content, 'thank', 'soros'): # Responds to Sal when he says 'Thanks Soros' # (note: not antisemitic joke, used to mock the antisemitic globalist Soros stories) await message.channel.send( 'No problemo buckaroo, anything for a fellow reptile.') if and_includes(message.content, 'who', 'horrible'): # You know what this does await message.channel.send(f"Why, {message.author.mention} of course!") if or_includes(message.content, 'socialis', 'communis'): # You know what this does await message.channel.send(f"AJ is the real commie here!") if or_includes(message.content, 'shane', 'metricity', 'the best') and (message.author != client.user): await message.channel.send(f"Shane really is the best.") if or_includes(message.content, "suck", "sux") and (message.author != client.user): # ya know what this does too await message.channel.send("You know what else sucks? Salex Bexman.") if url_validator(message.content): # Checks if message is a valid URL and a paywalled domain. If it is, returns the archive.is link. raw_url = message.content url = tldextract.extract(message.content) if url.domain in paywalled_sites: await message.channel.send(f"https://www.archive.is/{raw_url}") if message.content.startswith('!add'): # Add new domains to list of paywalled domains # Format: `!add DOMAIN_1 DOMAIN_2 ... DOMAIN_n` will add DOMAIN_1 thru DOMAIN_n to list # of paywalled sites and respond with a confirmation message. new_paywalls = message.content.split(" ")[1:] paywalled_sites += new_paywalls paywalled_sites = list(set(paywalled_sites)) paywalled_sites = [i for i in paywalled_sites if i != ""] with open('paywalled', 'w') as file: sites = "\n".join(paywalled_sites) file.write(sites) await message.channel.send('**Added the following domains:**' + "\n" + cprint(new_paywalls)) if message.content.startswith('!delete'): # Delete domains to list of paywalled domains # Format: `!add DOMAIN_1 DOMAIN_2 ... DOMAIN_n` will add DOMAIN_1 thru DOMAIN_n to list # of paywalled sites and respond with a confirmation message. new_paywalls = message.content.split(" ")[1:] paywalled_sites = [i for i in paywalled_sites if i not in new_paywalls] with open('paywalled', 'w') as file: sites = "\n".join(paywalled_sites) file.write(sites) await message.channel.send('**Deleted the following domains:**' + "\n" + cprint(new_paywalls)) if message.content.startswith("!list paywalls"): # Displays list of all sites on the current paywall list await message.channel.send("**Paywalled sites:**" + "\n" + cprint(sorted(paywalled_sites))) if message.content.startswith("!test"): await message.channel.send( "Stop spamming the f*****g chat with your damn tests u chode.") if message.content.startswith("!gif"): async with message.channel.typing( ): #makes the channel say the bot is typing scope = 1 melee = False num_gifs = 1 parsed = message.content.split(" ") if parsed[1] == 'melee': melee = True stripped = [strip(word) for word in parsed[2:]] else: stripped = [strip(word) for word in parsed[1:]] search = "+".join(stripped) try: scope_str = parsed[0][4:] scope = int(scope_str) if melee: num_gifs = scope except: pass choice = random.randint(1, scope) response = requests.get( f"https://api.giphy.com/v1/gifs/search?q={search}&api_key=WiLstLIo2SInusTmGDDkhhY0tU6xKNEl&limit={num_gifs}&offset={choice}" ) if response.status_code != 200: await message.channel.send("U stupid bruh, bad request.") else: gifs = response.json()['data'] gif_urls = [gif['url'] for gif in gifs] for url in gif_urls: await message.channel.send(url) if message.content.startswith("!calc"): async with message.channel.typing( ): #makes the channel say the bot is typing terms = " ".join(message.content.split(" ")[1:]) await message.channel.send(eval(terms))
""" print "N = %d, M = %d, K = %d, R = %d" % (N[1]-N[0], M[1]- M[0], K[1]-K[0] ,R[1]-R[0]) #NaiveBayes trains on M and tests on K dataset.extract_training_knowledge_base_features(kb_train_path, article_range = M) strip(kb_train_path) dataset.extract_testing_knowledge_base_features(kb_test_path, article_range = K) strip(kb_test_path) model1.fit(kb_train_path) model1.predict(kb_test_path, kb_pred_path, last_feature = False) print "model1 valid score:", model1.score(kb_test_path, kb_pred_path) """ # CRF trains on N and tests on R dataset.extract_crf_features(crf_train_path, article_range = N) strip(crf_train_path) dataset.extract_crf_features(crf_test_path, article_range = R) strip(crf_test_path) model2.fit(crf_train_path) model2.predict(crf_test_path, crf_pred_path, last_feature = False) print "model2 without adding data",model2.score(crf_test_path, crf_pred_path) """ # CRF trains on (N + predicted K) and tests on R dataset.extract_crf_features(crf_added_train_path, article_range = K) retag(crf_added_train_path, kb_pred_path, last_feature = False) os.system("cat %s %s > %s" % (crf_train_path, crf_added_train_path, crf_mixed_train_path)) model2.fit(crf_mixed_train_path) model2.predict(crf_test_path, crf_pred_path, last_feature = False) print "model2 after adding data",model2.score(crf_test_path, crf_pred_path)
def encrypt(text, key, seed): ''' Function -- encrypt encrypts plain text by providing a number indicating word position in key for each letter in the text paramters: text -- plain text string key -- name of text file seed - integer seed for choosing random word from key returns encrypted cipher text version of the plain text ''' # check validity of the inputs if key == '' or text == '': raise ValueError('both text and key values must be given') if not isinstance(key, str): raise TypeError('key must be a string') if not isinstance(text, str): raise TypeError('text must be a string') try: seed = int(seed) except ValueError: raise ValueError('seed must be an integer') if isinstance(key, str) and key[-4:] == '.txt': pass else: raise ValueError('key is an invalid filename') try: text = utils.strip(text) text = utils.process_text(text) text = utils.latin_caps(text) except ValueError: raise ValueError('text must only contain valid letters') if not os.path.exists(key): raise ValueError('key is an invalid filename') key_dict = {} # read data file = open(key, 'r') file_data = file.read().upper() # process key file_data = file_data.split() keywords = [] for word in file_data: stripped = word.strip(string.punctuation + ' ') if stripped != '': keywords.append(stripped) # second loop to go over cleaned up words for word in keywords: if word[0] in string.ascii_uppercase: if word[0].upper() in key_dict.keys(): key_dict[word[0]].append(keywords.index(word) + 1) elif word[0] in string.ascii_uppercase: key_dict[word[0]] = [keywords.index(word) + 1] file.close() # seed random.seed(int(seed)) # cipher output cipher = '' # iterate through text and generate a cipher text for letter in text: if letter in key_dict.keys(): picker = random.randint(0, (len(key_dict[letter]) - 1)) cipher = cipher + str(int(key_dict[letter][picker])) + ' ' else: msg = 'letters in text must match first letter >= 1 word in key' raise ValueError(msg) cipher = cipher.strip(' ') return cipher