def xor_strings(a, b): if isint(a): a = long_to_bytes(a) if isint(b): b = long_to_bytes(b) len_a = len(a) len_b = len(b) if hasattr(a, 'decode'): a = a.decode('latin-1') if hasattr(b, 'decode'): b = b.decode('latin-1') if len_a > len_b: b = extend_string(b, len_a) elif len_b > len_a: a = extend_string(a, len_b) if hasattr(a, 'encode'): a = a.encode() if hasattr(b, 'encode'): b = b.encode() long_a = bytes_to_long(a) long_b = bytes_to_long(b) result = long_a ^ long_b return result
def prepare_queryset_kwargs(self, field, value, negate): # only use 'in' or 'nin' if multiple values are specified if ',' in value: value = value.split(',') op = negate and 'nin' or self.op else: op = negate and 'ne' or '' if type(value) is list: for index, x in enumerate(value): if isint(x): value[index] = int(x) elif isint(value): value = int(value) return {'__'.join(filter(None, [field, op])): value}
def encode(s, **kwargs): if not kwargs['key']: print("Key not provided") return None if s.startswith('0x'): s = int(s[2:], 16) key = kwargs['key'] if key.startswith('0x'): key = int(key[2:], 16) if isint(s) and isint(key): result = xor_int(s, key) else: result = xor_strings(s, key) return format_result(result, hex(result), long_to_bytes(result).decode('latin-1'))
def encode(s, **kwargs): if s.startswith('0x'): s = int(s[2:], 16) if isint(s): s = long_to_bytes(s) else: s = s.encode() return b64encode(s).decode()
def parse_listkey(self, arg): if isint(arg): relative_days = int(arg) listkey = relative_days + (datetime.now()-datetime(1970,1,1)).days else: listkey = arg.strip() return listkey
def test_utils_all(): from utils import isint, set_at, get_int_input print("testing: isint:", isint([5])) print("testing: set_at:", set_at("ABS!", 2, "C")) print( "testing: get_int_input:", get_int_input(prompt="GIVE ME IINNTT!!", error_message="F**K YOU, INT I TOLD!"))
def __getitem__(self, daykey): assert isint(daykey) or daykey in self.extra_lists if daykey in self.extra_lists: list_ = self.extra_lists[daykey] else: daykey= int(daykey) if daykey not in self.days: self.days[daykey] = [] list_ = self.days[daykey] return list_
def decode(ciphertext, **kwargs): plaintext = [] if kwargs['key']: ciphertext = ciphertext.split(kwargs['key']) else: ciphertext = ciphertext.split() for char in ciphertext: if '-' in char and len(char) > 1: char = char.replace('-', '') if not isint(char): plaintext.append(char) continue plaintext.append(table[int(char[0]) - 1][int(char[1]) - 1]) return "".join(plaintext)
def prepare_queryset_kwargs(self, field, value, negate): """ Function: prepare_queryset_kwargs Summary: Prepara la query para los casos que se deban buscar dentro de una lista que posee diccionarios. Examples: key=[field_dict].[value] Attributes: Returns: query """ # only use 'in' or 'nin' if multiple values are specified dict_field, vals = value.split('.') if ',' in vals: value = [x for x in vals.split(',') if x] for index, x in enumerate(value): if isint(x): value[index] = int(x) else: value[index] = x else: if isint(vals): vals = int(vals) value = [vals] return dict(__raw__={field + '.' + dict_field: {"$in": value}})
def on_btn_ok(self, sender): pnum = self.combo.get_active_text() if not isint(pnum): self.close() return files = [cfile(self.filename) for i in range(int(pnum))] for i, f in enumerate(files): f.change_base(f.base + ' - part ' + str(i + 1).zfill(2)) f.add_subdir('split') if not (files[0].path_exists): files[0].create_path() subs_per_part = ceildiv(len(self.subs), int(pnum)) for idx, f in enumerate(files): partfile = srtFile(f.full_path) partfile.write_to_file(self.subs[(idx) * subs_per_part:(idx + 1) * subs_per_part]) self.close()
def compute(self, aitem: Union[int, PeakEventsTuple]) -> FitBead: "Action applied to the frame" if getattr(self, '_resolved', None) != getattr(self.track, 'path', None): self.config = self.config.resolve(self.track.path) self._resolved = self.track.path if isint(aitem): bead = cast(int, aitem) inp = cast(PeakEvents, cast(dict, self.data)[bead]) else: bead, inp = cast(PeakEventsTuple, aitem) events = self.__topeaks(inp) baseline = self.__baseline(bead, inp) singlestrand = self.__singlestrand(bead, inp) dist = self.distances(bead, events, baseline is not None, singlestrand is not None) return self.__beadoutput(bead, events, dist, (baseline, singlestrand))
def memory_str(memory, p=-1, executing_char='', sep="|", marker='', s1=2, s2=3, mode="", filter_0=True): result = "" # executing_char = f(executing_char, 1, 1) for i in range(len(memory)): cell = memory[i] if(mode == "int" and isint(cell)): cell = int(cell) elif(mode == "bool" and type(cell) == type(True) or mode == "boolean"): if(cell == False): cell = "F" else: cell = "T" if(i == p % len(memory)): result += f(marker + executing_char, s1, s1) + f(str(cell), s2, s2) + sep else: if(memory[i] == 0 and filter_0 and not executing_char=='o'): result += " "*(s1+s2) + sep else: result += f(cell, s1+s2, s1+s2) + sep return result
def restore_or_init(net, logger, dest_dir, args): from_scratch = False if utils.isint(args.restore): restore_from, restore_iter = (dest_dir, args.restore) restore_fromthis = True else: restore_from, restore_iter = utils.parent_dir(args.restore) if not osp.isabs(restore_from): restore_from = osp.join(utils.parent_dir(dest_dir)[0], restore_from) restore_fromthis = False saved = utils.get_saves(restore_from) restore_iter = int(restore_iter) if restore_iter == -1: if saved: start_iter, iter_dir = saved[-1] else: if restore_fromthis: from_scratch = True else: raise ValueError('No checkpoints found in {}'.format(restore_from)) else: for start_iter, iter_dir in saved: if start_iter == restore_iter: break else: if restore_iter == 0: from_scratch = True else: raise ValueError('Checkpoint {} not found in {}'.format(restore_iter, restore_from)) if from_scratch: start_iter = 0 if not from_scratch: snap_dest = osp.join(iter_dir, 'state_dict.pth') # map to cpu in case the optim was done with different devices print("Restoring net and logger state from", snap_dest) saved_state_dict = torch.load(snap_dest, map_location=lambda storage, loc: storage) if hasattr(saved_state_dict,'_OrderedDict__root'): load_weights(net, saved_state_dict) else: net.initialize_from_file(snap_dest) logger.restore(iter_dir) return start_iter
def test_hypothesis(**kwargs): """ Test a hypothesis on examples. How to call: - If called with no kwargs it simply checks a hypothesis on gl.current_example (normal mode) - If called with kwargs = {example:i}, where i is an integer then its gets example i from hte database, performs check and cleans up, i.e it restores gl.current_example (debugging mode) - If called with kwargs = {example:'all'} it checks a hypothesis on all seen examples and cleans up afterwards. (debugging mode) """ import utils current_example = gl.current_example # remember to clean up later if kwargs == {}: # default return test_default() else: if 'example' in kwargs: # perform a test with a specific example (for debugging) i = kwargs['example'] if utils.isint(i): utils.get_example(i) return test_default(last_seen = current_example) else: # then we want to check all seen examples for correctness (argument = 'all') return test_all(current_example)
def fixfootnoterefs(self): '''fixfootnoterefs(self): Fix the contents for a known pattern. Sometimes footnote references appear on their separate line above the line they should be on: 123) lorem ipsum instead of: lorem ipsum123) This causes the footnote reference to be considered as code, an so does the following line.''' todelete = list() for i in range(1, len(self.elements)): if not isinstance(self.elements[i], elements.Code): continue code = self.elements[i] if not len(code.lines) >= 2: continue if not (utils.isint(code.lines[0][:-1]) and code.lines[0][-1] == ')'): continue if not isinstance(self.elements[i - 1], elements.Text): continue prevtext = self.elements[i - 1] firstline = code.lines[1] + code.lines[0].lstrip() prevtext.addcontent(firstline) for line in code.lines[2:]: prevtext.addcontent(line) todelete.append(i) for i in reversed(todelete): self.elements[i:i + 1] = []
def plot_feature_contributions_surgery_class(X, y, feature_index, fcs, attributes, class_of_interest, title=None): surgery_index = np.where(attributes == 'Q44071_snCplexoAt')[0][0] if (not utils.isint(X[utils.firstNotNan( X[:, feature_index])][feature_index]) and not utils.isfloat( X[utils.firstNotNan(X[:, feature_index])][feature_index])): values = [i for i in set(X[:, feature_index]) if not utils.isnan(i) ] + [np.nan] x_surgery = [] surgery_colors = [] x_no_surgery = [] no_surgery_colors = [] x_nan = [] nan_colors = [] y_surgery = [] y_no_surgery = [] y_nan = [] contributions = {} for i in range(X.shape[0]): if (feature_index in fcs[i].keys()): if (X[i][surgery_index] == 'S' or X[i][surgery_index] == 'Y'): x_surgery.append(fcs[i][feature_index][class_of_interest]) y_surgery.append(values.index(X[i][feature_index])) if (y[i] == class_of_interest): surgery_colors.append('blue') else: surgery_colors.append('red') elif (utils.isnan(X[i][surgery_index])): x_nan.append(fcs[i][feature_index][class_of_interest]) #this is necessary because of weird behavior when X[i][feature_index] is nan #and for some reason it says that nan is not values y_nan.append(len(values) - 1) if (y[i] == class_of_interest): nan_colors.append('blue') else: nan_colors.append('red') else: x_no_surgery.append( fcs[i][feature_index][class_of_interest]) y_no_surgery.append(values.index(X[i][feature_index])) if (y[i] == class_of_interest): no_surgery_colors.append('blue') else: no_surgery_colors.append('red') # if(X[i][feature_index] not in contributions.keys()): # contributions[X[i][feature_index]] = [fcs[i][feature_index][class_of_interest]] # else: # contributions[X[i][feature_index]].append(fcs[i][feature_index][class_of_interest]) coi = str(class_of_interest) ax = plt.subplot(111) ax.scatter(x_surgery, y_surgery, marker='o', s=60, edgecolors=surgery_colors, facecolors='none') ax.scatter(x_no_surgery, y_no_surgery, marker='x', s=60, edgecolors=no_surgery_colors, facecolors='none') ax.scatter(x_nan, y_nan, marker='d', s=60, edgecolors=nan_colors, facecolors='none') plt.xlabel('feature contribution') plt.ylabel('values of feature %r' % attributes[feature_index]) ax.set_yticks(np.array(range(len(values) + 2)) - 1) ax.set_yticklabels([str('')] + values + [str('')]) red_patch = mpatches.Patch(color='red') blue_patch = mpatches.Patch(color='blue') xmarker = mlines.Line2D([], [], color='black', marker='x', markersize=10, linestyle='None') omarker = mlines.Line2D([], [], color='black', marker='o', markersize=10, linestyle='None', markerfacecolor='None', markeredgecolor='black') #plt.legend(handles=[red_patch,blue_patch]) plt.legend([red_patch, blue_patch, xmarker, omarker], [ 'Classe da instância ≠ ' + coi, 'Classe da instância = ' + coi, 'Não passou por cirurgia', 'Passou por cirurgia' ], numpoints=1, fontsize='small') plt.show() else: values = sorted([ round(i, 4) for i in (set(X[:, feature_index])) if not utils.isnan(i) ]) # + [np.nan] print(values) nan_index = values[-1] - values[-2] x_surgery = [] surgery_colors = [] x_no_surgery = [] no_surgery_colors = [] x_nan = [] nan_colors = [] y_surgery = [] y_no_surgery = [] y_nan = [] for i in range(X.shape[0]): if (feature_index in fcs[i].keys()): if (X[i][surgery_index] == 'S' or X[i][surgery_index] == 'Y'): x_surgery.append(fcs[i][feature_index][class_of_interest]) y_surgery.append((X[i][feature_index])) if (y[i] == class_of_interest): surgery_colors.append('blue') else: surgery_colors.append('red') elif (utils.isnan(X[i][surgery_index])): x_nan.append(fcs[i][feature_index][class_of_interest]) #this is necessary because of weird behavior when X[i][feature_index] is nan #and for some reason it says that nan is not values y_nan.append(values[-1] + nan_index) if (y[i] == class_of_interest): nan_colors.append('blue') else: nan_colors.append('red') else: x_no_surgery.append( fcs[i][feature_index][class_of_interest]) y_no_surgery.append((X[i][feature_index])) if (y[i] == class_of_interest): no_surgery_colors.append('blue') else: no_surgery_colors.append('red') coi = str(class_of_interest) fig, ax = plt.subplots() ax.scatter(x_surgery, y_surgery, marker='o', s=60, facecolors='none', edgecolors=surgery_colors) ax.scatter(x_no_surgery, y_no_surgery, marker='x', s=60, edgecolors=no_surgery_colors) ax.scatter(x_nan, y_nan, marker='d', s=60, facecolors='none', edgecolors=nan_colors) fig.canvas.draw() labels = [''] + [item.get_text() for item in ax.get_yticklabels()] + [''] if (values[-1] + nan_index < ax.get_yticks()[-1]): plt.yticks( [values[0] - nan_index] + sorted(list(ax.get_yticks()) + [values[-1] + nan_index])) else: plt.yticks([values[0] - nan_index] + sorted( list(ax.get_yticks()) + [values[-1] + nan_index, values[-1] + 2 * nan_index])) labels[-2] = 'nan' plt.xlabel('feature contribution') plt.ylabel('values of feature %r' % attributes[feature_index]) ax.set_yticklabels(labels) red_patch = mpatches.Patch(color='red') blue_patch = mpatches.Patch(color='blue') xmarker = mlines.Line2D([], [], color='black', marker='x', markersize=10, label='Bla', linestyle='None') omarker = mlines.Line2D([], [], color='black', marker='o', markersize=10, label='Bla', linestyle='None', markerfacecolor='None', markeredgecolor='black') #plt.legend(handles=[red_patch,blue_patch]) plt.legend([red_patch, blue_patch, xmarker, omarker], [ 'Classe da instância ≠ ' + coi, 'Classe da instância = ' + coi, 'Não passou por cirurgia', 'Passou por cirurgia' ], numpoints=1, fontsize='small') plt.show() if (title is not None): plt.savefig(title) plt.close() f = open(title, 'w') f.write('X=' + str(X)) f.write('\ny=' + str(y)) f.write('\nfcs=' + str(fcs)) f.write('\nfeatures=' + str(attributes)) f.write('\nfeature_index=' + str(feature_index)) f.write('\nvalues=' + str(values)) f.write('\nx_surgery=' + str(x_surgery)) f.write('\ny_surgery=' + str(y_surgery)) f.write('\nsurgery_colors=' + str(surgery_colors)) f.write('\nx_no_surgery=' + str(x_no_surgery)) f.write('\ny_no_surgery=' + str(y_no_surgery)) f.write('\nno_surgery_colors=' + str(no_surgery_colors)) f.write('\nx_nan=' + str(x_nan)) f.write('\ny_nan=' + str(y_nan)) f.write('\nnan_colors=' + str(nan_colors))
def handle_text(message): uid = message.from_user.id if message.from_user.id not in USERS: USERS[message.from_user.id] = u.User() if message.text == "Отмена" and uid in ADMINS: if uid in INADMINMENU: if INADMINMENU[uid] != '': INADMINMENU[uid] = '' markup = u.get_keyboard(["Существующие вопросы", "Добавить вопрос", "Отмена"]) bot.send_message(message.from_user.id, "Меню администратора \n" "(Визуальное представление меню, " "логика и способы взаимодействия c ботом являются " "демо-вариантами и могут быть изменены)", reply_markup=markup) return bot.send_message(message.from_user.id, "Чтобы начать " "опрос введите команду /start", reply_markup=u.get_keyboard([])) return if message.text == "Существующие вопросы" and uid in ADMINS: msg = "Текущие вопросы в боте: \n\n" print(Questions) for i in range(0, len(Questions)): q = Questions[i] msg += "(№ {}) ".format(i) msg += '{} \n Ответы: {}\n\n'.format(q.text, ", ".join(q.answers)) msg += "Для удаления вопроса отправьте его номер." INADMINMENU[uid] = "Существующие вопросы" markup = u.get_keyboard(["Отмена"]) print(msg) bot.send_message(uid, msg, reply_markup=markup) return if message.text == "Добавить вопрос" and uid in ADMINS: INADMINMENU[uid] = "Добавить вопрос" msg = "Для добавления вопроса введите текст нового вопроса, " \ "затем в скобках варианты через запятую, если требуется. \n\n" \ "Пример: Введите ваш возраст (12 лет, 21 год, 45, более 50-ти)\n\n" \ "(строгие требования к написанию вопроса относятся лишь к существующему "\ "прототипу и в дальнейшем ввод вопросов будет упрощен)" markup = u.get_keyboard(["Отмена"]) print(msg) bot.send_message(uid, msg, reply_markup=markup) return if uid in INADMINMENU: if INADMINMENU[uid] == "Существующие вопросы": if u.isint(message.text): id = int(message.text) Questions.remove(Questions[id]) msg = "Вопрос удален" bot.send_message(uid, msg) # markup = u.get_keyboard(["/start"]) # bot.send_message(message.from_user.id, "Нажмите на кнопку старт чтоб начать " # "опрос или введите команду /start", reply_markup=markup) markup = u.get_keyboard(["Существующие вопросы", "Добавить вопрос", "Отмена"]) bot.send_message(message.from_user.id, "Меню администратора \n" "(Визуальное представление меню, " "логика и способы взаимодействия c ботом являются " "демо-вариантами и могут быть изменены)", reply_markup=markup) # INADMINMENU[uid] = "" return else: msg = "Для удаления вопроса отправьте его номер." bot.send_message(uid, msg) return if INADMINMENU[uid] == "Добавить вопрос": try: t = message.text tq = t.split(' (')[0] if len(t.split(' (')) > 1: ta = t.split(' (')[1][:-1] ta_arr = ta.split(', ') if len(ta_arr[0]) > 0: Questions.append(u.Question(tq, ta_arr)) else: Questions.append(u.Question(tq)) else: Questions.append(u.Question(tq)) markup = u.get_keyboard(["/start"]) bot.send_message(uid, "Вопрос добавлен", reply_markup=markup) INADMINMENU[uid] = "" except Exception: bot.send_message(uid, "Пожалуйста следуйте требованиям при написании вопроса " "(строгие требования относятся лишь к существующему " "прототипу и в дальнейшем ввод вопросов будет упрощен)") return if message.text.lower() == "да": if len(Questions) > 0: USERS[message.from_user.id].question = Questions[0] markup = u.get_keyboard(USERS[uid].question.answers) bot.send_message(uid, USERS[uid].question.text, reply_markup=markup) else: markup = u.get_keyboard([]) bot.send_message(uid, "В боте еще не заданы вопросы", reply_markup=markup) if len(Questions) > 1: USERS[uid].question = Questions[1] USERS[uid].q_index = 1 else: USERS[uid].is_last_quest = True return if not USERS[uid].is_last_quest and USERS[message.from_user.id].question is not None: USERS[uid].answs.append(message.text) # if USERS[uid].question is None: # USERS[uid].question = Questions[0] # USERS[uid].q_index = 0 markup = u.get_keyboard(USERS[uid].question.answers) bot.send_message(uid, USERS[uid].question.text, reply_markup=markup) if len(Questions) > USERS[uid].q_index + 1: USERS[uid].q_index += 1 USERS[uid].question = Questions[USERS[uid].q_index] else: USERS[uid].is_last_quest = True return if USERS[uid].is_last_quest: USERS[uid].answs.append(message.text) # markup = u.get_keyboard(["/start"]) markup = types.ReplyKeyboardRemove(selective=False) bot.send_message(message.from_user.id, "Спасибо, за пройденный опрос", reply_markup=u.get_keyboard([])) send_to_admins(USERS[message.from_user.id]) USERS[message.from_user.id] = u.User() if message.text.lower() == "нет": markup = u.get_keyboard(["/start"]) bot.send_message(message.from_user.id, "Нажмите на кнопку старт чтоб начать " "опрос или введите команду /start", reply_markup=markup) return
def readData(class_name, class_questionnaire='Q92510', data_path=None, missing_input='none', dummy=False, transform_numeric=False, use_text=False, skip_class_questionnaire=True): # attributes are separated by commas (',') # "nan" is assigned to fields with 'N/A' or 'None' print('Reading data...') data = pd.read_csv(data_path, header=0, delimiter=",", na_values=['N/A', 'None', 'nan', 'NAAI', 'NINA'], quoting=0, encoding='utf8') if (not transform_numeric): dummy = False #print(data.columns[-1]) # data = data.dropna(subset=[class_name]) # data = data.drop(np.where([e == 'NAAI' or e == 'NINA' for e in data[data.columns[-1]]])[0]) # print(data.shape) data = data.drop(data.columns[data.columns.str.endswith('id')], 1) data = data.drop(data.columns[data.columns.str.endswith('token')], 1) data = (data.drop(data.columns[data.columns.str.endswith('ipaddr')], 1)) data = (data.drop(data.columns[data.columns.str.endswith('date')], 1)) data = (data.drop(data.columns[data.columns.str.endswith('stamp')], 1)) #data = (data.drop(data.columns[data.columns.str.endswith('participant_code')],1)) data = (data.drop(data.columns[data.columns.str.endswith('datLesao')], 1)) data = (data.drop(data.columns[data.columns.str.endswith('datNasc')], 1)) # data = (data.drop(data.columns[data.columns.str.endswith('Origem')],1)) # data = (data.drop(data.columns[data.columns.str.endswith('Cidade')],1)) # data = (data.drop(data.columns[data.columns.str.endswith('Estado')],1)) # data = (data.drop(data.columns[data.columns.str.endswith('País')],1)) # data = (data.drop(data.columns[data.columns.str.endswith('participant_code')],1)) # data = (data.drop(data.columns[data.columns.str.endswith('Natural de')],1)) #data = ((((data.T).drop_duplicates(keep='first')).dropna(how='all')).T) #dropping columns that are constant #data = data.loc[:,data.apply(pd.Series.nunique) != 1] ## data = pp.preprocess(data_path,class_name) n_samples = data.shape[0] n_features = data.shape[1] regex_date = re.compile( '(\d{4})-(\d{2})-(\d{2})\s?((\d{2}):(\d{2}):(\d{2}))?') treatment = np.empty(n_features, dtype='U5') attributes = [] categories = [] transformedData = [] index = 0 si = 0 print('Transforming data...') ### representing the categories with numbers for attribute in data.columns: if skip_class_questionnaire and class_questionnaire is not None and class_questionnaire in attribute and class_name not in attribute: index += 1 continue # else: # if class_questionnaire in attribute and 'Ombro' not in attribute: # index+=1 # continue t = pd.factorize(data[attribute].values, sort=True) #temp = t[0] i = utils.firstNotNan(data[attribute].values) try: result = regex_date.match(data[attribute].values[i]) if (result): treatment[index] = 'date' elif (len(t[1]) > 0.9 * n_samples and len(t[1][0]) > 50): # if(attribute == 'participant_code'): # temp = t[0] # treatment[index] = 'int' # else: treatment[index] = 'text' else: if (utils.isfloat(data[attribute].values[i])): # index+=1 # continue #temp = [float(x) for x in t[0]] treatment[index] = 'float' elif (not dummy): if (transform_numeric or utils.isint(data[attribute].values[i])): temp = t[0] # if not utils.isint(data[attribute].values[i]): # index += 1 # continue else: temp = data[attribute].values treatment[index] = 'int' else: treatment[index] = 'bin' except TypeError: if (utils.isfloat(data[attribute].values[i])): # index+=1 # continue temp = np.array(data[attribute].values).reshape(-1, 1) treatment[index] = 'float' elif (utils.isint(data[attribute][i])): # index+=1 # continue temp = (np.array(data[attribute].values) * 1).reshape(-1, 1) treatment[index] = 'int' else: print("could not identify type of attribute %s" % attribute) exit(-1) #treatment of class attribute if (class_name in attribute): temp = t[0] treatment[index] = 'int' if (treatment[index] == 'float'): if (missing_input != 'none'): imp = preprocessing.Imputer(strategy=missing_input, axis=0) temp = imp.fit_transform( X=np.array(data[attribute].values).reshape(-1, 1)) else: temp = data[attribute].values #print(np.array(list((float(x) for x in temp))).reshape(-1,1).shape) transformedData.append( np.array(list((float(x) for x in temp))).reshape(-1, 1)) else: # t[0] corresponds to the translated numeric data # t[1] corresponds to a list with the possible values for each feature' # (different values in a column, e.g. [sim, não]). # the index of that value in the list corresponds to its numeric representation # (e.g. [sim, não] -> sim is represented by 0 and não by 1). # if(missing_input != 'none' and treatment[index] != 'bin'): # imp = preprocessing.Imputer(missing_values=-1,strategy=missing_input,axis=0) # temp = imp.fit_transform(X=temp.reshape(-1,1)) if (treatment[index] == 'bin'): #imp = preprocessing.Imputer(missing_values=-1,strategy='mean',axis=0) #temp = imp.fit_transform(X=np.array(temp).reshape(-1,1)) temp = pd.get_dummies(np.ravel(data[attribute].values)) for x in temp.columns: attributes.append(attribute + '=' + x) #print(temp[x].reshape(-1,1).shape) transformedData.append(temp[x].reshape(-1, 1)) elif (treatment[index] == 'int'): if (not transform_numeric): temp = data[attribute].values for temp_index in range(len(temp)): if (isinstance(temp[temp_index], str)): temp[temp_index] = temp[temp_index].upper() i = utils.firstNotNan(data[attribute].values) if (utils.isint(data[attribute].values[i]) and missing_input != 'none'): temp[data[attribute].values == 'NAAI'] = -1 temp[np.isnan( np.array(data[attribute].values, dtype=float))] = -1 imp = preprocessing.Imputer(missing_values=-1, strategy=missing_input, axis=0) temp = imp.fit_transform( X=np.array(list(int(x) for x in temp)).reshape(-1, 1)) elif (missing_input != 'none'): imp = preprocessing.Imputer(missing_values=np.nan, strategy=missing_input, axis=0) temp = imp.fit_transform(X=np.array(temp).reshape(-1, 1)) #print(np.array(temp).reshape(-1,1).shape) transformedData.append(np.array(temp).reshape(-1, 1)) elif (treatment[index] == 'date'): temp = [] for date in data[attribute].values: if (not isinstance(date, float)): temp.append(int(date[:4])) else: temp.append(-1) if (missing_input != 'none'): imp = preprocessing.Imputer(strategy='most_frequent', axis=0) temp = imp.fit_transform(X=np.array(temp).reshape(-1, 1)) #print(np.array(temp).reshape(-1,1).shape) transformedData.append(np.array(temp).reshape(-1, 1)) elif (use_text and treatment[index] == 'text'): #try: bigword = '' #print(attribute) try: bag_of_words = CountVectorizer(min_df=0.25, stop_words=sw, ngram_range=(1, 4)) #print(data[attribute]) words = np.array( bag_of_words.fit_transform( ((data[attribute].values))).todense()) c = 0 for word in bag_of_words.get_feature_names(): bigword = bigword + word + ' ' attributes.append(attribute + ' termo: ' + word) transformedData.append(words[:, c].reshape(-1, 1)) c += 1 # wordcloud = WordCloud(stopwords=sw,background_color='white').generate(bigword,) # plt.imshow(wordcloud) # plt.axis('off') # plt.show() except (ValueError, AttributeError): index += 1 continue else: index += 1 continue # else: # print('undefined option for pre processing: (%s, %s) ' % (categ) ) # exit(-1) categories.append(t[1]) if (treatment[index] != 'text' and treatment[index] != 'bin'): attributes.append(attribute) index += 1 data = np.array(transformedData).reshape(-1, n_samples).T data = pd.DataFrame(data, columns=attributes) # pd.DataFrame(data,columns=attributes).to_csv('out.csv', index=False) # f = open('DorR.csv', 'w') # f.write(','.join(np.array(attributes, dtype=object))) # for d in data: # f.write('\n') # f.write(','.join(str(dd) for dd in d)) # exit() return data
def parseUCSCHeader(header, header_prefix='>', retainKeys=True, toInt=True): ''' Parse UCSC Table Browser FASTA header. Example: hg38_knownGene_ENST00000376838.5_0 range=chr1:11130526-11131568 5'pad=10 3'pad=3 strand=- repeatMasking=none Args - header: str FASTA header line - header_prefix: str. default='>' FASTA header line prefix - retainKeys: bool. default=True Retain original FASTA header keys, e.g, 5'pad and 3'pad, instead of the converted valid Python identifiers, e.g., pad5 and pad3. - toInt: bool. default=True Where possible, convert string values to int. May impact performance. Returns: dict - Map of metadata of protein sequence. - Keys - name: sequence name - chrom: chromosome (chr#) - chromStart: start coordinate (browser format: 1-based start and end) - chromEnd: end coordinate (browser format: 1-based start and end) - 5'pad: extra bases at the 5' end of the feature - 3'pad: extra bases at the 3' end of the feature - strand: +/- - repeatMasking: mask repeats - none: no repeat masking - N: repeats are masked to N's - lower: repeats are masked to lower case - See https://genomebrowser.wustl.edu/goldenPath/help/hgTextHelp.html#FASTA for an older description of the FASTA header. - In the Table Browser, these options are specified after clicking 'get output' ''' # strip whitespace and prefix header = header.strip() if header.startswith(header_prefix): header = header[len(header_prefix):] # extract key, value pairs from regex match to dict pattern = FASTA_HEADER_REGEX_UCSC m = pattern.match(header) data = m.groupdict() if retainKeys: data['5\'pad'] = data.pop('pad5') data['3\'pad'] = data.pop('pad3') # remove leading/trailing whitespace from each value in dict if toInt: # convert str to int if applicable for key, value in data.items(): value = value.strip() if utils.isint(value): value = int(value) data[key] = value else: data = {key: value.strip() for key, value in data.items()} return data
def plot_feature_contributions(X, feature_index, fcs, attributes, class_of_interest, title=None): if (not utils.isint(X[utils.firstNotNan( X[:, feature_index])][feature_index]) and not utils.isfloat( X[utils.firstNotNan(X[:, feature_index])][feature_index])): values = [i for i in set(X[:, feature_index]) if not utils.isnan(i) ] + [np.nan] pos_fcs = [] neg_fcs = [] pos_values = [] neg_values = [] zero_fcs = [] zero_values = [] contributions = {} for i in range(X.shape[0]): if (feature_index in fcs[i].keys()): if (fcs[i][feature_index][class_of_interest] > 0): pos_fcs.append(fcs[i][feature_index][class_of_interest]) #this is necessary because of weird behavior when X[i][feature_index] is nan #and for some reason it says that nan is not values if (utils.isnan(X[i][feature_index])): pos_values.append(len(values) - 1) else: pos_values.append(values.index(X[i][feature_index])) elif (fcs[i][feature_index][class_of_interest] == 0): zero_fcs.append(0) if (utils.isnan(X[i][feature_index])): zero_values.append(len(values) - 1) else: zero_values.append(values.index(X[i][feature_index])) else: neg_fcs.append(fcs[i][feature_index][class_of_interest]) if (utils.isnan(X[i][feature_index])): neg_values.append(len(values) - 1) else: neg_values.append(values.index(X[i][feature_index])) if (X[i][feature_index] not in contributions.keys()): contributions[X[i][feature_index]] = [ fcs[i][feature_index][class_of_interest] ] else: contributions[X[i][feature_index]].append( fcs[i][feature_index][class_of_interest]) print('Contributions:') for value in contributions.keys(): print('Value %r' % value) print( '\nMean: %r Variance: %r' % (np.mean(contributions[value]), np.var(contributions[value]))) c = (contributions.items()) boxplot([a[1] for a in c], [a[0] for a in c], title=None) ax = plt.subplot(111) plt.plot(pos_fcs, pos_values, 'x', color='blue') plt.plot(neg_fcs, neg_values, 'x', color='red') plt.plot(zero_fcs, zero_values, 'x', color='black') plt.xlabel('feature contribution') plt.ylabel('values of feature %r' % attributes[feature_index]) ax.set_yticks(np.array(range(len(values) + 2)) - 1) ax.set_yticklabels([str('')] + values + [str('')]) plt.show() else: values = sorted([ round(i, 4) for i in (set(X[:, feature_index])) if not utils.isnan(i) ]) # + [np.nan] nan_index = values[-1] - values[-2] pos_fcs = [] neg_fcs = [] pos_values = [] neg_values = [] zero_fcs = [] zero_values = [] contributions = {} for i in range(X.shape[0]): if (feature_index in fcs[i].keys()): if (fcs[i][feature_index][class_of_interest] > 0): pos_fcs.append(fcs[i][feature_index][class_of_interest]) #this is necessary because of weird behavior when X[i][feature_index] is nan #and for some reason it says that nan is not values if (utils.isnan(X[i][feature_index])): pos_values.append(values[-1] + nan_index) else: pos_values.append(X[i][feature_index]) elif (fcs[i][feature_index][class_of_interest] == 0): zero_fcs.append(0) if (utils.isnan(X[i][feature_index])): zero_values.append(values[-1] + nan_index) else: zero_values.append(X[i][feature_index]) else: neg_fcs.append(fcs[i][feature_index][class_of_interest]) if (utils.isnan(X[i][feature_index])): neg_values.append(values[-1] + nan_index) else: neg_values.append((X[i][feature_index])) if (utils.isnan(X[i][feature_index])): if ('nan' in contributions.keys()): contributions['nan'].append( fcs[i][feature_index][class_of_interest]) else: contributions['nan'] = [ fcs[i][feature_index][class_of_interest] ] elif (X[i][feature_index] in contributions.keys()): contributions[(X[i][feature_index])].append( fcs[i][feature_index][class_of_interest]) else: contributions[(X[i][feature_index])] = [ fcs[i][feature_index][class_of_interest] ] print('Contributions:') for value in contributions.keys(): print('Value %r' % value) print( 'Mean: %r Variance: %r' % (np.mean(contributions[value]), np.std(contributions[value]))) c = (contributions.items()) boxplot([a[1] for a in c], [a[0] for a in c], title=None) fig, ax = plt.subplots() plt.plot(pos_fcs, pos_values, 'x', color='blue') plt.plot(neg_fcs, neg_values, 'x', color='red') plt.plot(zero_fcs, zero_values, 'x', color='black') fig.canvas.draw() labels = [''] + [item.get_text() for item in ax.get_yticklabels()] + [''] if (values[-1] + nan_index < ax.get_yticks()[-1]): plt.yticks( [values[0] - nan_index] + sorted(list(ax.get_yticks()) + [values[-1] + nan_index])) else: plt.yticks([values[0] - nan_index] + sorted( list(ax.get_yticks()) + [values[-1] + nan_index, values[-1] + 2 * nan_index])) labels[-2] = 'nan' plt.xlabel('feature contribution') plt.ylabel('values of feature %r' % attributes[feature_index]) ax.set_yticklabels(labels) plt.show() if (title is not None): plt.savefig(title) plt.close()
def _parselinewithoutindent(self, line, tocmatcher): splits = line.split(maxsplit=1) groups = utils.groupwords(line) previous = self.elements[-1] if self.elements else None if line.lstrip()[:20] == "Forward references: ": # make it its own paragraph self.elements.append(elements.Paragraph(line)) self._inelement = True return #print(tocmatcher._titlestack[-1], tocmatcher._headingstack[-1], line, sep='\t') if tocmatcher.matchtitle(line): if (re.match(toc.KEYREGEX, splits[0]) or re.match(fr"{toc.CHAPTERREGEX}\.", splits[0])): # numbered title self.elements.append(elements.NumberedTitleHeading(line)) else: # title self.elements.append(elements.TitleHeading(line)) self._inelement = False return if tocmatcher.matchheading(line): if len(splits) == 2: # numbered title self.elements.append(elements.NumberedTitleHeading(line)) else: # heading self.elements.append(elements.NumberedHeading(line)) self._inelement = False return if splits[0][0] in ("—", '•'): # list element self.elements.append(elements.UnorderedListItem(line)) self._inelement = True return #print(splits) if splits[0][:-1].isdigit() and splits[0][-1] == '.': # ordered list self.elements.append(elements.OrderedListItem(line)) self._inelement = True return if len(splits) == 2: if line[3].isspace(): # maybe value definition with number as value if utils.isint(line[:3]): # value definition of number self.elements.append(elements.ValueDefinition(*splits)) self._inelement = True return if line[0] == '−' and utils.isint(line[1:3]): # value definition of number, but with a misunderstood − # U+2212 MINUS SIGN instead of a - U+002D HYPHEN-MINUS self.elements.append( elements.ValueDefinition(splits[0].replace('−', '-'), splits[1])) self._inelement = True return if splits[0][:2] == "__" and splits[0][-2:] == "__": # value definition of preprocessing macro self.elements.append(elements.ValueDefinition(*splits)) self._inelement = True return if (not line[:2].isspace() # no or little indent and len(groups) == 2): # only 2 groups stripped = line.lstrip() # remove any indentation l = len(splits[0]) # if the second split starts between the 12th or 15th column, # and there are at least 4 spaces between splits if (12 <= stripped.find(splits[1], l) <= 15 and stripped[l:l + 4].isspace()): # value definition of anything self.elements.append(elements.ValueDefinition(*splits)) self._inelement = True return if line[:4].isspace(): # indented text? if self._inelement: # maybe it's part of the previous element ? def maybepreviouselement(previouselement, line): if isinstance(previouselement, elements.Code): return True if isinstance(previouselement, elements.ValueDefinition): return True if isinstance(previouselement, elements.OrderedListItem): return True if isinstance(previouselement, elements.UnorderedListItem): indent = previouselement.indent + 2 if (line[:indent].isspace() and not line[indent].isspace()): return True return False if maybepreviouselement(previous, line): previous.addcontent(line) return if (self.elements and isinstance(previous, elements.UnorderedListItem) and previous.level > 1): # indented paragraph, inside a list self.elements.append(elements.Paragraph(line)) self._inelement = True return if line[:7].isspace(): # code block # we already checked for _inelement self.elements.append(elements.Code(line)) self._inelement = True return # idk, make it a paragraph # regular text if (self._inelement and isinstance(previous, elements.Text) and not isinstance(previous, elements.ValueDefinition)): # continuation of previous text element previous.addcontent(line) return # new paragraph self.elements.append(elements.Paragraph(line)) self._inelement = True return
def encode(s, **kwargs): if s.startswith('0x'): s = int(s[2:], 16) if not isint(s): s = bytes_to_long(s.encode()) return bin(s)[2:]
import decisionTree as dt import randomForest as rf import numpy as np import pandas as pd import read import utils import math from sklearn.metrics import accuracy_score dummy = False transform = False use_text = False print('Testing utils...') assert (utils.isint(10)) assert (utils.isint('50')) assert (utils.isint('-999')) assert (not utils.isint(1.0)) assert (not utils.isint('50.0')) assert (utils.isint(True)) assert (not utils.isint('aba')) assert (not utils.isint('a?a')) assert (not utils.isint('49.x')) assert (not utils.isfloat('0.x')) assert (utils.isfloat('0.0')) assert (utils.isfloat('12.984')) assert (utils.isfloat('-0.4')) assert (not utils.isfloat('9')) original_attributes = np.array( ['Outlook', 'Temp', 'Humidity', 'Windy?', 'Class'])