def sube(nombrearchivo): doc = fitz.open(nombrearchivo) salida = open(nombrearchivo + ".txt", "wb") for pagina in doc: texto = pagina.getText().encode("utf8") salida.write(texto.lower()) salida.write(b"\n-----\n") salida.close() with open(nombrearchivo + '.txt', 'r', encoding='UTF8') as archivo: texto = archivo.read() stop_words = set(stopwords.words(fileids=('english', 'spanish'))) word_tokens = word_tokenize(texto) word_tokens = list( filter(lambda token: token not in string.punctuation, word_tokens)) #areglo=[] #word_tokens.append("--") filtro = [] for palabra in word_tokens: if palabra not in stop_words: filtro.append(palabra) c = Counter(filtro) y = OrderedDict(c.most_common()) with open(nombrearchivo + 'KEYWORDS.txt', 'w', encoding='UTF8') as far: for k, v in y.items(): far.write(f"{k} {v}\n")
def create_vocabulary(): """ Using RNN_CONFIG['vocab_using_n_tweets'] tweets from the train.csv dataset, \n creates a vocabulary with RNN_CONFIG['AE_vocab_size']] words.\n The vocabulary is an ordered dictionary: the keys are the word radicals and the keys each word's index.\n :return: None, dumps the vocabulary as a .json file at data/vocab.json """ with open(cfg['csv_relative_path'], newline='') as csvfile: data = list(csv.reader(csvfile))[1:] vocab = {} ps = PorterStemmer() for idx, line in enumerate(data[:RNN_CONFIG['vocab_using_n_tweets']]): printProgressBar(idx, RNN_CONFIG['vocab_using_n_tweets'], 'creating dictionary') for word in line[COLUMN_NAME_TO_IDX['text']].lower().split(' '): w = ps.stem(word) if w in vocab: vocab[w] += 1 else: vocab[w] = 1 # sort the vocabulary by descending occurrences vocab = OrderedDict([(k, idx) for idx, (k, _) in enumerate( sorted(vocab.items(), key=lambda item: item[1], reverse=True) [:RNN_CONFIG['AE_vocab_size']])]) with open('data/vocab.json', 'w') as f: json.dump(vocab, f, indent=4)
def parse_argv(args: list, namemap: dict = None) -> dict: """ A simple helper for get arguments of __main__ function """ d = OrderedDict() if not args: return d i = 1 while True: if i >= len(args): break s = args[i] if s.startswith('-'): if namemap and s in namemap: s = namemap[s] if s not in d: d[s] = [] while i + 1 < len(args) and args[i + 1][0] != '-': d[s].append(args[i + 1]) i += 1 i += 1 for k, v in tuple(d.items()): if not v: d[k] = None elif len(v) == 1: d[k] = v[0] return d
def importSeqFiles(request, pk): template_name = 'importFiles.html' excel_file = request.FILES['excel_file'] excel_file_content = excel_file.read().decode("utf-8") lines = excel_file_content.split("\n") runDict = OrderedDict() count = 1 for line in lines: line = line.rstrip("\r") v = line.split(",") if (v[1] != ""): runName = v[1] if (v[0] != ""): expName = v[0] runDict[v[2]] = [expName, runName] count += 1 if "File Path" in runDict: del runDict["File Path"] if " " in runDict: del runDict[" "] context = {} runDictSorted = sorted(runDict.items()) context['runDict'] = runDictSorted project = Project.objects.get(pk=pk) context['project'] = project return (request, template_name, context)
def test_largescale(): s = Stopwatch() integration_factor = 5 device_map = device_parser.build_device_map(device_parser.parse_data('test.xml')) test_size = 10000 histogram = OrderedDict() for i in range(5): time = 0.0 for j in range(5): s.start() generate_test_input(device_map, test_size, file_name='test_input1.csv') s.stop() print('Generating test input of size {}: '.format(test_size), s.read()) s.reset() s.start() analyze_data_nograph('csvs/test_input1.csv', integration_factor, device_map) s.stop() print('Processing input of size {}: '.format(test_size), s.read()) time += s.read() s.reset() print('Average time for input of size {}: '.format(test_size), time/5) histogram[test_size] = time/5 test_size *= 2 print(histogram) for i,j in histogram.items(): print(' size | time ') print('{0:5d}|{1:5f}'.format(i,j))
def createFasta(input_file,append_file,order): fastaInDict=dict() seq=OrderedDict() with open(input_file) as FASTAIN, open(append_file, "a") as APP, open(order) as ORD: fastaParse = SeqIO.parse(FASTAIN,"fasta") for fastaSeq in fastaParse: s = str(fastaSeq.seq) idFasta = fastaSeq.id fastaInDict[idFasta]=s for line in ORD: line=line.rstrip("\n") v=line.split("\t") val=v[0].split("__") start=int(val[1])-1 end=int(val[2]) k=val[0] chrom=v[1] if chrom in seq: seq[chrom].append(fastaInDict[k][start:end]) else: seq[chrom]=[] seq[chrom].append(fastaInDict[k][start:end]) for k,v in seq.items(): print("Writing Chromosome"+str(k)) APP.write(">"+k+"\n") APP.write("".join(v)) APP.write("\n")
def firstUniqChar(self, s: str) -> int: from _collections import OrderedDict order_dict = OrderedDict() for char in s: order_dict[char] = order_dict.get(char,0)+1 for key,val in order_dict.items(): if val == 1: return s.index(key) return -1
class BasicNet: """ """ def __init__(self, *args, **kwargs): """ """ self.results = OrderedDict() self.y = None self.x = None self.layers = OrderedDict() for layer in args: layer_name = layer.name self.layers[layer_name] = layer self.name = kwargs.get('name', str(random.randint(0, 2**5))) def forward(self, x, *args, **kwargs): """ """ self.x = x y = x self.results['input'] = {'x': y} for layer_name, layer in self.layers.items(): y = layer.forward(y, *args, **kwargs) self.results[layer_name] = {'x': y} self.y = y return self.y def __str__(self): printable = f'Net {self.name}:' template = '\n\t{}' for layer_name, layer in self.layers.items(): printable += template.format(layer.__str__()) return printable
def unpack(self, action: OrderedDict) -> OrderedDict: newactions = {} for actid, value in action.items(): if actid in self._subdicts: origactions = self._subdicts[actid].getById(value) for origid, origact in origactions.items(): newactions[origid] = origact else: newactions[actid] = value return OrderedDict(newactions)
def stringPermutation(orgString): char_count_map = {}; for temp_char in orgString: if temp_char not in char_count_map: char_count_map[temp_char] = 1; continue; char_count_map[temp_char] += 1; char_count_map = OrderedDict(sorted(char_count_map.items(), key=lambda t: t[0])); temp_arr = []; string_permutaion_list = []; Util.string_permutation_helper(len(orgString), char_count_map, temp_arr, string_permutaion_list) # print(len(string_permutaion_list)) print(string_permutaion_list)
def unpack(self, action:OrderedDict) -> OrderedDict: ''' @action a packed action. Utility function. action must be OrderedDict with the keys the packed-entity labels. @return an unpacked version of the packed action ''' newactions = {} for actid, value in action.items(): if actid in self._subdicts: origactions = self._subdicts[actid].getById(value) for origid, origact in origactions.items(): newactions[origid] = origact else: newactions[actid] = value return OrderedDict(newactions)
def make_theano_tick(self): """Generate the theano function for running the network simulation. :returns: theano function """ # dictionary for all variables # and the theano description of how to compute them updates = OrderedDict() # for every node in the network for node in self.nodes.values(): # if there is some variable to update if hasattr(node, 'update'): # add it to the list of variables to update every time step updates.update(node.update(self.dt)) # create graph and return optimized update function return theano.function([], [], updates=updates.items())#, mode='ProfileMode')
def make_theano_tick(self): """Generate the theano function for running the network simulation. :returns: theano function """ # dictionary for all variables # and the theano description of how to compute them updates = OrderedDict() # for every node in the network for node in self.nodes.values(): # if there is some variable to update if hasattr(node, 'update'): # add it to the list of variables to update every time step updates.update(node.update(self.dt)) # create graph and return optimized update function return theano.function([], [], updates=updates.items())
def get_importance(df_train, df_dev, y_name, metric='acc', excluded_vars=None, **kwargs): """ This function perform the random permutation and return an ordered dict with keys to be the variable names, and metric to be the metric. :param df_train: Training dataset. :param df_dev: Dev dataset. :param y_name: The name of the y variable. :param metric: String. Can be 'acc', 'micro_f1', 'macro_f1', 'precision', 'recall' :param excluded_vars: List. These variables should not be permuted, and will not enter the selection. An example of such variable is the ID, :param kwargs: Arguments for logistic regression. :return: An ordered dict containing the metrics. """ df_train_copy = df_train.copy(deep=True) df_dev_copy = df_dev.copy(deep=True) y_dev = df_dev_copy[y_name] vars_to_permute = set(df_train.columns.values).difference( set(excluded_vars)) y_pred = get_classification_result(df_train_copy, df_dev, y_name, excluded_vars, **kwargs) metric_score = get_metric(y_pred, y_dev, metric) result = OrderedDict() result['original'] = metric_score for var in tqdm(vars_to_permute): df_train_permute = df_train_copy.copy(deep=True) df_train_permute[var] = np.random.permutation(df_train_permute[var]) y_pred = get_classification_result(df_train_permute, df_dev, y_name, excluded_vars, **kwargs) metric_score = get_metric(y_pred, y_dev, metric) result['var'] = metric_score result = OrderedDict(sorted(result.items(), key=operator.itemgetter(1))) return result
def readFastq(fastq): sequence, qual = OrderedDict(), OrderedDict() with uopen(fastq) as fin: line = fin.readline() if not line.startswith('@'): sequence = readFasta(fastq) return sequence, OrderedDict( [n, re.sub(r'[^!]', 'I', re.sub(r'[^ACGTacgt]', '!', s))] for n, s in sequence.items()) with uopen(fastq) as fin: for lineId, line in enumerate(fin): if lineId % 4 == 0: name = line[1:].strip().split()[0] sequence[name] = [] qual[name] = [] elif lineId % 4 == 1: sequence[name].extend(line.strip().split()) elif lineId % 4 == 3: qual[name].extend(line.strip().split()) for s in sequence: sequence[s] = (''.join(sequence[s])).upper() qual[s] = ''.join(qual[s]) return sequence, qual
def prepare_data(train, test, eval_sessions_neg_samples): sess_clicks = OrderedDict() sess_date_tr = OrderedDict() ctr = 0 curid = -1 curdate = None for tr in train.itertuples(): # for data in reader: sessid = tr.SessionId if curdate and not curid == sessid: # date = '' # if opt.dataset == 'yoochoose': # date = time.mktime(time.strptime(curdate[:19], '%Y-%m-%dT%H:%M:%S')) # else: # date = time.mktime(time.strptime(curdate, '%Y-%m-%d')) date = tr.Time # SARA sess_date_tr[curid] = date curid = sessid # if opt.dataset == 'yoochoose': # item = data['item_id'] # else: # item = data['item_id'], int(data['timeframe']) todo: timeframe ?! item = int(tr.ItemId) # SARA # curdate = '' # if opt.dataset == 'yoochoose': # curdate = data['timestamp'] # else: # curdate = data['eventdate'] curdate = tr.Time # SARA if sessid in sess_clicks: sess_clicks[sessid] += [item] else: sess_clicks[sessid] = [item] ctr += 1 # date = '' # if opt.dataset == 'yoochoose': # date = time.mktime(time.strptime(curdate[:19], '%Y-%m-%dT%H:%M:%S')) # else: # date = time.mktime(time.strptime(curdate, '%Y-%m-%d')) # for i in list(sess_clicks): #this use digenetica 'timeframe' to sort items of each session. sess_clicks will be list of item_id s which are sorted based on their timeframe # sorted_clicks = sorted(sess_clicks[i], key=operator.itemgetter(1)) # sess_clicks[i] = [c[0] for c in sorted_clicks] # sess_date[curid] = date sess_date_tr[curid] = tr.Time sess_date_ts = OrderedDict() ctr = 0 curid = -1 curdate = None for index, ts in test.iterrows(): sessid = ts.SessionId if curdate and not curid == sessid: date = ts.Time sess_date_ts[curid] = date curid = sessid item = ts.ItemId curdate = ts.Time if sessid in sess_clicks: sess_clicks[sessid] += [item] else: sess_clicks[sessid] = [item] ctr += 1 sess_date_ts[curid] = ts.Time tra_sess = list(sess_date_tr.items()) tes_sess = list(sess_date_ts.items()) # Sort sessions by date MALTE: not needed # tra_sess = sorted(tra_sess, key=operator.itemgetter(1)) # [(session_id, timestamp), (), ] # tes_sess = sorted(tes_sess, key=operator.itemgetter(1)) # [(session_id, timestamp), (), ] # Choosing item count >=5 gives approximately the same number of items as reported in paper tra_ids, tra_dates, tra_seqs, item_dict, reversed_item_dict = obtian_tra( tra_sess, sess_clicks) #tes_ids, tes_dates, tes_seqs = obtian_tes(tes_sess, sess_clicks, item_dict) tes_ids, tes_dates, tes_seqs, tes_neg_samples_ids, count_clicks_in_test_items_not_in_train_set = obtian_tes_with_neg_samples( tes_sess, sess_clicks, item_dict, eval_sessions_neg_samples) tr_seqs, tr_dates, tr_labs, tr_ids = process_seqs(tra_seqs, tra_dates) #te_seqs, te_dates, te_labs, te_ids = process_seqs(tes_seqs, tes_dates) te_seqs, te_dates, te_labs, te_ids, te_neg_samples = process_seqs_with_neg_samples( tes_seqs, tes_dates, tes_neg_samples_ids) #print("te_seqs", te_seqs[:5]) #print("te_labs", te_labs[:5]) #print("te_neg_samples", te_neg_samples[:5]) tra = (tr_seqs, tr_labs) tes = (te_seqs, te_labs, te_neg_samples) return tra, tes, item_dict, reversed_item_dict, count_clicks_in_test_items_not_in_train_set
from _collections import OrderedDict favorite_languages = OrderedDict() favorite_languages['jen'] = 'python' favorite_languages['sarah'] = 'c' favorite_languages['edward'] = 'ruby' favorite_languages['phil'] = 'python' for name, language in favorite_languages.items(): print(name.title() + '\'s favorite language is', language.title() + '.')
class App: def __init__(self): self.root = Tk() self.block_size = IntVar() self.current_column_step = IntVar() self.block_size.set(3) self.isDecode = False """Увеличиаем размер блока""" def update_block_size(self): self.block_size.set(self.spin.get()) """Выделяем блок мышкой""" def markArea(self, event): # Если есть таблица какая-нибудь if (self.isDecode and event.x > 30): self.firstCanvas.delete("hello") # Надо выделить область по которой кликнули мышкой column_size = 270 // self.block_size.get() # определяем в какой мы колонке our_column = event.x // column_size # выделяем self.firstCanvas.create_rectangle(40 + our_column * column_size, 25, column_size + column_size * our_column + 20, 30 + 15 * len(self.table[0]), outline="#ff0000", tags="hello") self.marked = our_column # Передаем данные полученные из частотного анализа self.draw_data(self.marked) if (our_column < self.block_size.get()): # передаем в ползунок self.current_column_step.set(self.steps[self.marked]) """""" def peredecode(self, event): if (self.isDecode and self.marked < self.block_size.get()): self.steps[self.marked] = self.current_column_step.get() # Теперь перерисовываем self.create_message(self.steps, self.table) def draw_data(self, item): self.fq.delete("all") column = 0 j = 0 for (i, letter) in enumerate(self.super_dicts[item].keys()): if (10 + 15 * j > 350): column += 1 j = 0 self.fq.create_text(20 + 30 * column, 10 + 15 * j, text=letter + ": " + str(self.super_dicts[item].get(letter))) j += 1 def makeWidgets(self): self.firstFrame = Frame(self.root) self.firstFrame.pack(side=LEFT, anchor="nw", padx=10, pady=10) Label(self.firstFrame, text="Таблица для исходного сообщения").pack() self.firstAlterFrame = Frame(self.firstFrame) self.firstAlterFrame.pack() self.firstCanvas = Canvas(self.firstAlterFrame, width=300, height=500, bg="#ffffff") self.firstCanvas.bind("<Button-1>", self.markArea) self.firstCanvas.pack(side=LEFT) self.firstCanvasScroll = Scrollbar(self.firstAlterFrame, orient='vertical', command=self.firstCanvas.yview) self.firstCanvasScroll.pack(side=RIGHT, fill=Y, expand=True) self.firstCanvas.configure(yscrollcommand=self.firstCanvasScroll.set) Label(self.firstFrame, text="Введите исходное сообщение").pack(pady=10) self.firstText = Text(self.firstFrame, width=37, height=10) self.firstText.pack() self.secondFrame = Frame(self.root) self.secondFrame.pack(side=LEFT, anchor="n", padx=10, pady=10) Label(self.secondFrame, text="Таблица для конечного сообщения").pack() self.secondAlterFrame = Frame(self.secondFrame) self.secondAlterFrame.pack() self.secondCanvas = Canvas(self.secondAlterFrame, width=300, height=500, bg="#ffffff") self.secondCanvas.pack(side=LEFT) self.secondCanvasScroll = Scrollbar(self.secondAlterFrame, orient='vertical', command=self.secondCanvas.yview) self.secondCanvasScroll.pack(side=RIGHT, fill=Y, expand=True) self.secondCanvas.configure(yscrollcommand=self.secondCanvasScroll.set) Label(self.secondFrame, text="Конечный результат").pack(pady=10) self.secondText = Text(self.secondFrame, width=37, height=10) self.secondText.pack() self.lastFrame = Frame(self.root) self.lastFrame.pack(side=LEFT, anchor="n", padx=10, pady=10) Label(self.lastFrame, text="Панель управления").pack() Label(self.lastFrame, text="Размер блока").pack(pady=10) self.spin = Spinbox(self.lastFrame, from_=0, textvariable=self.block_size, to=10, command=self.update_block_size) self.spin.pack() Label(self.lastFrame, text="Частотный анализ").pack(pady=10) self.fq = Canvas(self.lastFrame, width=150, height=400, bg="#ffffff") self.fq.pack() # Наш ползунок Label(self.lastFrame, text="Ручная корректировка").pack(pady=10) self.scale = Scale(self.lastFrame, from_=0, to=33, length=150, tickinterval=6, orient='horizontal', showvalue=YES, variable=self.current_column_step, command=self.peredecode) self.scale.pack() Button(self.lastFrame, text="Расшифровать!", width=15, height=5, command=self.decode).pack(pady=20) """Инициализирукм гуй""" def start(self): self.root.title("Лабораторная работа номер 2") # self.root.geometry("900x500") self.makeWidgets() self.root.mainloop() def decode(self): self.isDecode = True self.super_dicts = [] # Берем сообщение из текстового блока и загоняем в столбец по размеру блока self.table = ['' for i in range(self.block_size.get())] message = self.firstText.get("1.0", 'end-1c') for (i, letter) in enumerate(message): self.table[i % self.block_size.get()] += letter self.draw(self.firstCanvas, self.table) # Теперь мы должны сделать словари с частотными анализами для каждой колонки ох, говорю я self.steps = [ self.analis(self.table[i]) for i in range(self.block_size.get()) ] # Так, у нас массив сдвигов, нам надо каждый столбец сдвинуть на это число # Нам нужен не только массив сдвигов, но и словари с частоными анализами # Теперь берем каждую строку и сдвигаем на сдвиги, но это все в отдельной функции self.create_message(self.steps, self.table) """Собираем исходное сообщение""" def create_message(self, steps, table): self.alterTable = ['' for i in table] for i in range(len(table)): self.alterTable[i] = App.caesar(table[i], 33 - steps[i]) self.draw(self.secondCanvas, self.alterTable) out = '' for i in range(len(self.alterTable[0])): for j in range(self.block_size.get()): try: out += self.alterTable[j][i] except IndexError: out += '' self.secondText.delete('1.0', "end") self.secondText.insert('1.0', out) """Функция для сдвига по шифру цезаря Можно конечно в одну строчку через bytes.translate, но этот вариант мне больше нравится""" def caesar(message, step): alpha = [ ' ', 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я' ] res = [] for item in range(len(alpha)): if item + step >= len(alpha): res += alpha[item + step - len(alpha)] elif item + step < 0: res += alpha[len(alpha) - item + step] else: res += alpha[item + step] wow = list(zip(alpha, res)) msq = '' for letter in message: for item in wow: if letter == item[0]: msq += item[1] break else: msq += letter return msq """Анализ частотностит символов""" def analis(self, column): alphabet = " абвгдежзийклмнопрстуфхцчшщъыьэюя" self.dicts = {letter: 0 for letter in alphabet} dict_step = {letter: i for (i, letter) in enumerate(alphabet)} for i in column: self.dicts[i] += 1 self.dicts = OrderedDict( sorted(self.dicts.items(), key=lambda t: -t[1])) self.super_dicts.append(self.dicts) azaza = list(self.dicts.keys()) return dict_step.get(azaza[0]) """Заполнянм кавас столбиками с буквами""" def draw(self, obj, table): # наша исходная таблица готова, нам необходимо теперь заполнить канвас obj.delete("all") obj.create_line(30, 0, 30, 30 + len(table[0]) * 15, fill="#000000") obj.create_line(30, 20, 300, 20, fill="#000000") # Делим оставшееся пространство для того что бы положить туда столбцы column_size = 270 // self.block_size.get() # теперь в цикле раставляем цифры и сообщение for i in range(self.block_size.get()): obj.create_text(column_size / 1.2 + column_size * i, 10, text=str(i + 1)) for (j, letter) in enumerate(table[i]): obj.create_text(column_size / 1.2 + column_size * i, 30 + j * 15, text=letter) # Нумерация строк for i in range(len(table[0])): obj.create_text(10, 30 + i * 15, text=str(i + 1)) obj.configure(scrollregion=obj.bbox("all"))
from _collections import OrderedDict favorite_languages = OrderedDict() favorite_languages['jen'] = 'python' favorite_languages['sarah'] = 'c' favorite_languages['edward'] = 'ruby' favorite_languages['phil'] = 'python' # Since we used the ordered dictionary class from the builtin module it will print the dictionary in the order we # entered it. for name, language in favorite_languages.items(): print(name.title() + "'s favorite language is " + language.title()) # You can still sort the ordered dictionary if you like. for name, language in sorted(favorite_languages.items()): print(name.title() + "'s favorite language is " + language.title())
from _collections import OrderedDict import operator command = input() courses = dict() while command != "end": command = command.split(' : ') course = command[0] student = command[1] if course not in courses: courses[course] = [student] else: courses[course].append(student) command = input() ordered = OrderedDict( sorted(courses.items(), key=lambda c: len(c[1]), reverse=True)) for k, v in ordered.items(): print(f"{k}: {len(v)}") for value in sorted(v): print(f"-- {value}")
def _report_textual_results(self, tally_coll, res_dir): ''' Give a sequence of tallies with results from a series of batches, create long outputs, and inputs lists from all tallies Computes information retrieval type values: precision (macro/micro/weighted/by-class) recall (macro/micro/weighted/by-class) f1 (macro/micro/weighted/by-class) acuracy balanced_accuracy Combines these results into a Pandas series, and writes them to a csv file. That file is constructed from the passed-in res_dir, appended with 'ir_results.csv'. Finally, constructs Github flavored tables from the above results, and posts them to the 'text' tab of tensorboard. Returns the results measures Series :param tally_coll: collect of tallies from batches :type tally_coll: ResultCollection :param res_dir: directory where all .csv and other result files are to be written :type res_dir: str :return results of information retrieval-like measures :rtype: pandas.Series ''' all_preds = [] all_labels = [] for tally in tally_coll.tallies(phase=LearningPhase.TESTING): all_preds.extend(tally.preds) all_labels.extend(tally.labels) res = OrderedDict({}) res['prec_macro'] = precision_score(all_labels, all_preds, average='macro', zero_division=0) res['prec_micro'] = precision_score(all_labels, all_preds, average='micro', zero_division=0) res['prec_weighted'] = precision_score(all_labels, all_preds, average='weighted', zero_division=0) res['prec_by_class'] = precision_score(all_labels, all_preds, average=None, zero_division=0) res['recall_macro'] = recall_score(all_labels, all_preds, average='macro', zero_division=0) res['recall_micro'] = recall_score(all_labels, all_preds, average='micro', zero_division=0) res['recall_weighted'] = recall_score(all_labels, all_preds, average='weighted', zero_division=0) res['recall_by_class'] = recall_score(all_labels, all_preds, average=None, zero_division=0) res['f1_macro'] = f1_score(all_labels, all_preds, average='macro', zero_division=0) res['f1_micro'] = f1_score(all_labels, all_preds, average='micro', zero_division=0) res['f1_weighted'] = f1_score(all_labels, all_preds, average='weighted', zero_division=0) res['f1_by_class'] = f1_score(all_labels, all_preds, average=None, zero_division=0) res['accuracy'] = accuracy_score(all_labels, all_preds) res['balanced_accuracy'] = balanced_accuracy_score( all_labels, all_preds) res_series = pd.Series(list(res.values()), index=list(res.keys())) # Write information retrieval type results # to a one-line .csv file, using pandas Series # as convenient intermediary: res_csv_path = os.path.join(res_dir, 'ir_results.csv') res_series.to_csv(res_csv_path) res_rnd = {} for meas_nm, meas_val in res.items(): # Measure results are either floats (precision, recall, etc.), # or np arrays (e.g. precision-per-class). For both # cases, round each measure to one digit: res_rnd[meas_nm] = round(meas_val,1) if type(meas_val) == float \ else meas_val.round(1) ir_measures_skel = { 'col_header': ['precision', 'recall', 'f1'], 'row_labels': ['macro', 'micro', 'weighted'], 'rows': [[ res_rnd['prec_macro'], res_rnd['recall_macro'], res_rnd['f1_macro'] ], [ res_rnd['prec_micro'], res_rnd['recall_micro'], res_rnd['f1_micro'] ], [ res_rnd['prec_weighted'], res_rnd['recall_weighted'], res_rnd['f1_weighted'] ]] } ir_per_class_rows = [[ prec_class, recall_class, f1_class ] for prec_class, recall_class, f1_class in zip( res_rnd['prec_by_class'], res_rnd['recall_by_class'], res_rnd['f1_by_class'])] ir_by_class_skel = { 'col_header': ['precision', 'recall', 'f1'], 'row_labels': self.class_names, 'rows': ir_per_class_rows } accuracy_skel = { 'col_header': ['accuracy', 'balanced_accuracy'], 'row_labels': ['Overall'], 'rows': [[res_rnd['accuracy'], res_rnd['balanced_accuracy']]] } ir_measures_tbl = GithubTableMaker.make_table(ir_measures_skel, sep_lines=False) ir_by_class_tbl = GithubTableMaker.make_table(ir_by_class_skel, sep_lines=False) accuracy_tbl = GithubTableMaker.make_table(accuracy_skel, sep_lines=False) # Write the markup tables to Tensorboard: self.writer.add_text('Information retrieval measures', ir_measures_tbl, global_step=0) self.writer.add_text('Per class measures', ir_by_class_tbl, global_step=0) self.writer.add_text('Accuracy', accuracy_tbl, global_step=0) return res_series
texto = miTexto.read().lower() #leemos y guardamos el archivo en texto, ademas de pasar todo a minisculas result = re.sub(r'[^\w\s]','',texto) #eliminamos las comillas simples, comillas dobles, parentesis, etc. stop_words = set(stopwords.words('spanish')) #indicamos una "lista" de pronombres en español word_tokens = word_tokenize(result) #separamos las cadenas del texto en palabras word_tokens = list(filter(lambda token: token not in string.punctuation,word_tokens)) #eliminamos los signos de puntuacion filtro = [] #arreglo que nos servira como filtro más adelante for palabra in word_tokens: #verificamos cada palabra que hay en la lista sin singnos de puntuacion if palabra not in stop_words: #verificamos si la palabra no esta en la lista de pronombres filtro.append(palabra) #si no esta, la añadimos a arreglo filtro c=Counter(filtro) #contamos las palabras que se repitan en el arreglo de filtro val = input("Ingrese el numero de palabras que desee que se muestren: ") #pedimos cuantas palabras repetidas quiere el usuario que se muestren try: #try para validar que ingrese un numero y no una cadena num = int(val) #convertimos el input de string a entero y=OrderedDict(c.most_common(num)) #le indicamos que las ordene de las más repetidas a las que menos se repitan, solo se mostraran el numero de palabras que se haya ingresado anteriormente print(y) #imprimimos en consola las palabras with open('revision.txt','w',encoding="utf-8") as file: #creamos/reescribimos un archivo llamado revision para escribir las palabras for k,v in y.items(): #recorremos y que son las palabras mas comunes, "k" es la palabra y "v" es el numero que se repiten #no estamos imprimiendo el numero que se repiten, solo las palabras, por eso abajo solo se imprime "k" file.write(k + "\n")#en el archivo escribimos las palabras que hay en "y" con un salto de linea except ValueError: #si el numero de palabras a mostrar no es un numero muestra el mensaje y termina el programa print("La entrada no es un numero entero.") #imprimimos el mensaje else: #si no hay un archvio con el nombre ingresado en la ruta, se muestra el mensaje y termina el programa print("No existe el archivo en la ruta.")#imprimimos el mensaje.
# Collections.OrderedDict() from _collections import OrderedDict OD = OrderedDict() N = int(input()) for i in range(N): item_price = input().split() item = '' price = '' for j in item_price: if j.isalpha(): item = item + j + ' ' else: price = price + j price = int(price) item = item[:-1] if item in OD: OD[item] = OD[item] + price else: OD[item] = price for i,j in OD.items(): print(i, j)
"Gibrael": { "age": 10, "address": "Sesimbra" }, "Susan": { "age": 11, "address": "Lisbon" }, "Charles": { "age": 9, "address": "Sesimbra" }, } # use the items() method to return a dictionary's keys their values for k, v in dict1.items(): print(k, v) # to iterate over the values for v in dict2.values(): print(v) # to iterate over the keys: for k in dict3: print(k) # to iterate over a nested dictionary, just remember that the value of # the parent dictionary is second dictionary that you must extract the key,value pair from for k1, v1 in dict3.items(): print(k1)
d['d'] = 4 d['e'] = 5 print(d) for k, v in d.items(): print(k, v) dict = OrderedDict() dict['a'] = 1 dict['d'] = 4 dict['e'] = 5 dict['b'] = 2 dict['c'] = 3 for k, v in dict.items(): print(k, v) if d == dict: print('Equals') else: print('Different') dict2 = OrderedDict() dict2['a'] = 1 dict2['d'] = 4 dict2['b'] = 2 dict2['e'] = 5 dict2['c'] = 3 if dict2 == dict:
# Имеется log-файл с ip-адресами, проанализировать последние N адресов и # сохранить в новый файл пары значений "ip-адрес - кол-во запросов" from _collections import OrderedDict, defaultdict, deque N = 3000 with open('big_log.txt', 'r', encoding='utf-8') as f: log = deque(f, N) print(log) data = OrderedDict() spam = defaultdict(int) for item in log: ip = item[:-1] if not ip.startswith('192.168'): spam[ip] += 1 data[ip] = 1 print(spam) print(data) data.update(spam) print(data) with open('data.txt', 'w', encoding='utf-8') as f: for key, value in data.items()(): f.write(f'{key} - {value}\n')
target_drop = rel_drop[i] rel_drop[i] = 0 trackers[cls].add(min(np.min(rel_drop),0)/target_drop * 100) return [trackers[k].get() for k in cifar10_classes] runs = OrderedDict() runs["Simple"] = "cifar10" runs["No dropout"] = "cifar10_no_dropout" runs["Resnet 110"] = "cifar10_resnet" rel_drops = {} worsts = {} for k, v in runs.items(): r = lib.get_runs([v]) print("Downloading", k) download(v, r) rel_drops[k] = get_relative_drop(v, r) worsts[k] = get_worst(v, r) def draw_stats(stats, shape): fig = plt.figure(figsize=shape) for i, k in enumerate(runs.keys()): stat = stats[k] plt.bar([x * (len(runs) + 1) + i for x in range(len(stat))], [s.mean for s in stat], yerr=[s.std for s in stat], align='center') plt.xticks([(len(runs) + 1) * x + (len(runs) - 1) / 2 for x in range(len(cifar10_classes))], cifar10_classes) for tick in fig.axes[0].xaxis.get_major_ticks()[1::2]:
def importSeqFiles(request, prj_pk): template_name = 'importFiles.html' excel_file = request.FILES['excel_file'] excel_file_content = excel_file.read().decode("utf-8") lines = excel_file_content.rstrip().split("\n") project = Project.objects.get(pk=prj_pk) exsisting_files = SeqencingFile.objects.filter( project=project).values_list('sequencingFile_mainPath', flat=True) runDict = OrderedDict() dupDict = OrderedDict() orderList = [] context = {} count = 1 for line in lines: line = line.rstrip("\r") v = line.split(",") if (count == 1): exName = seqRun = filePath = md5sum = sha256 = False runName = expName = path = "" if ("Experiment Name" in v): exName = True exNameIndx = v.index("Experiment Name") orderList.append("Experiment") if ("Sequencing Run Name" in v): seqRun = True seqRunIndx = v.index("Sequencing Run Name") orderList.append("Run") if ("File Path" in v): filePath = True filePathIndx = v.index("File Path") if ("md5sum" in v): md5sum = True md5sumIndx = v.index("md5sum") orderList.append("md5sum") if ("sha256sum" in v): sha256 = True sha256Indx = v.index("sha256sum") orderList.append("sha256sum") else: if (seqRun == True and v[seqRunIndx] != ""): runName = v[seqRunIndx] if (exName == True and v[exNameIndx] != ""): expName = v[exNameIndx] if (filePath == True and v[filePathIndx] != ""): path = v[filePathIndx] if (expName != "" and path != ""): runDict[v[filePathIndx]] = [expName, runName] else: messages.error( request, "something is not correct with your input file.") break if (md5sum == True and v[md5sumIndx] != ""): md5Sum = v[md5sumIndx] runDict[v[filePathIndx]].append(md5Sum) context['md5sum'] = True else: context['md5sum'] = False if (sha256 == True and v[sha256Indx] != ""): sha256sum = v[sha256Indx] runDict[v[filePathIndx]].append(sha256sum) context['sha256sum'] = True else: context['sha256sum'] = False if (path in exsisting_files): dupDict[path] = runDict[path] del runDict[path] count += 1 if " " in runDict: del runDict[" "] runDictSorted = sorted(runDict.items()) dupDictSorted = sorted(dupDict.items()) context['runDict'] = runDictSorted context['dupDict'] = dupDictSorted context['orderList'] = orderList context['project'] = project return (request, template_name, context)
class Transect(sm.CustomObject): base_type = "transect" type = "transect" datum = "top of face" def __init__(self, **kwargs): super(Transect, self).__init__() self._locs = OrderedDict() self.name = kwargs.get("name", None) start = kwargs.get("start", (0, 0)) # coords (lat, long) end = kwargs.get("end", (0, 0)) self.s_coords = Coords(lat=start[0], lon=start[1]) self.e_coords = Coords(lat=end[0], lon=end[1]) self.ug_values = [] self.ug_xs = [] self.h_face = kwargs.get("h_face", None) self.av_ground_slope = kwargs.get("av_ground_slope", None) self._extra_class_inputs = [ "locs", "start", "end", "ug_values", "ug_xs", "h_face", "av_ground_slope", "datum" ] self.inputs = self.inputs + self._extra_class_inputs def add_cpt_by_coords(self, cpt, coords, **kwargs): esp = kwargs.get("esp", None) loc = Loc(cpt=cpt, name=cpt.file_name, esp=esp) loc.coords = coords return self.add_loc_by_coords(coords, loc) def add_cpt(self, cpt, x, **kwargs): offset = kwargs.get("offset", None) off_dir = kwargs.get("off_dir", "-") esp = kwargs.get("esp", None) loc = Loc(cpt=cpt, name=cpt.file_name, offset=offset, off_dir=off_dir, esp=esp) return self.add_loc(x, loc) def get_cpt_names(self): _cpts = [] for x in self.locs: _cpts.append(self.locs[x].cpt_file_name) return _cpts def set_ids(self): for i, loc_name in enumerate(self.locs): self.locs[loc_name].id = i + 1 if self.locs[loc_name].soil_profile is not None: self.locs[loc_name].soil_profile.id = i + 1 def to_dict(self, extra=(), **kwargs): outputs = OrderedDict() skip_list = ["locs"] if hasattr(self, "inputs"): full_inputs = list(self.inputs) + list(extra) else: full_inputs = list(extra) for item in full_inputs: if item not in skip_list: value = self.__getattribute__(item) outputs[item] = sf.collect_serial_value(value) return outputs def add_to_dict(self, models_dict, **kwargs): if self.base_type not in models_dict: models_dict[self.base_type] = OrderedDict() outputs = self.to_dict(**kwargs) models_dict[self.base_type][self.unique_hash] = outputs for loc_num in self.locs: self.locs[loc_num].add_to_dict( models_dict, parent_dict=models_dict[self.base_type][self.unique_hash]) def reset_cpt_folder_paths(self, folder_path): for loc_name in self.locs: self.locs[loc_name].cpt_folder_path = folder_path @property def tran_line(self): try: from liquepy.spatial.map_coords import Line return Line(self.s_coords, self.e_coords) except ImportError as e: warnings.warn('Need to import spatial packages', stacklevel=3) warnings.warn(e, stacklevel=3) return None @property def x_end(self): return self.tran_line.dist @property def locs(self): return self._locs def add_loc(self, x: float, loc): loc.x = x self._locs[x] = loc self._sort_locs() return self._locs[x] def add_loc_by_coords(self, coords, loc): from liquepy.spatial import map_coords if not sum(self.start) or not sum(self.end): raise ValueError("start and end coordinates must be set") loc.x = map_coords.calc_proj_line_dist(self.tran_line, coords) loc.offset = map_coords.calc_line_offset(self.tran_line, coords) loc.off_dir = map_coords.calc_line_off_dir(self.tran_line, coords) self._locs[loc.x] = loc self._sort_locs() return self._locs[loc.x] @locs.setter def locs(self, locs): for loc_id in locs: loc_dist = locs[loc_id]["x"] self.locs[loc_dist] = Loc() sm.add_to_obj(self.locs[loc_dist], locs[loc_id]) def _sort_locs(self): """ Sort the locs by distance. :return: """ self._locs = OrderedDict(sorted(self._locs.items(), key=lambda t: t[0])) def get_loc_by_name(self, name): for x in self.locs: if self.locs[x].name == name: return self.locs[x] def get_loc_by_dist(self, dist): return self.locs[dist] def loc(self, index): index = int(index) if index == 0: raise KeyError("index=%i, but must be 1 or greater." % index) return list(self._locs.values())[index - 1] def remove_loc(self, loc_int): key = list(self._locs.keys())[loc_int - 1] del self._locs[key] def replace_loc(self, loc_int, soil): key = list(self._locs.keys())[loc_int - 1] self._locs[key] = soil @property def start(self): return self.s_coords.as_tuple @property def end(self): return self.e_coords.as_tuple @start.setter def start(self, values): self.s_coords = Coords(lat=values[0], lon=values[1]) @end.setter def end(self, values): self.e_coords = Coords(lat=values[0], lon=values[1])
from _collections import OrderedDict # print("Before:\n") # od = OrderedDict() # od['a'] = 1 # od['b'] = 2 # od['c'] = 3 # od['d'] = 4 # for key, value in od.items(): # print(key, value) # # print("\nAfter:\n") # od['c'] = 5 # for key, value in od.items(): # print(key, value) input_string = 'aabbccddef' ordered_dict = OrderedDict() for input in input_string: keys = ordered_dict.keys() if input not in keys: ordered_dict[input] = 1 else: ordered_dict[input] += 1 out_string = "" for key, value in ordered_dict.items(): out_string = out_string + str(key) + str(value) print(out_string)
from _collections import OrderedDict favorite_languages = OrderedDict() favorite_languages['jen'] = 'python' favorite_languages['sarah'] = 'c' favorite_languages['edward'] = 'ruby' favorite_languages['phil'] = 'python' for nome, linguagen in favorite_languages.items(): print("\t A linguagem favorita de {} é {}.".format(nome.title(), linguagen.title()))
cars[car][1] -= fuel print( f"{car} driven for {distance} kilometers. {fuel} liters of fuel consumed." ) if cars[car][0] >= 100000: print(f"Time to sell the {car}!") del cars[car] elif todo == "Refuel": car = command[1] fuel = int(command[2]) if cars[car][1] + fuel > 75: diff = (cars[car][1] + fuel) - 75 newdiff = fuel - diff cars[car][1] += newdiff print(f"{car} refueled with {newdiff} liters") else: cars[car][1] += fuel print(f"{car} refueled with {fuel} liters") else: car = command[1] km = int(command[2]) if cars[car][0] - km <= 10000: cars[car][0] = 10000 else: cars[car][0] -= km print(f"{car} mileage decreased by {km} kilometers") command = input() sorted_cars = OrderedDict( sorted(cars.items(), key=lambda x: x[1], reverse=True)) for k, v in sorted_cars.items(): print(f"{k} -> Mileage: {v[0]} kms, Fuel in the tank: {v[1]} lt.")