def shannon(probabilities): try: answer = {} answer['idx_to_bin'] = {} probabilities.sort(reverse=True) for i in range(len(probabilities)): answer['idx_to_bin'][i] = '' utils.separate(probabilities, answer['idx_to_bin']) except Exception as e: return e return answer
def run(self, log: str, lower_loss=17000): with open(log, "a+") as f: # Configure Written f.write("\n{}\n".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) f.write(">>> The Size of Input Images: {}\n".format(cfg.SIZE)) # Train for epoch in range(cfg.EPOCH): f.write(">>> epoch: {}\n".format(epoch)) self.net.train() loss_list = [] for i, (x, t) in enumerate(self.train): x, t = x.to(self.device), (utils.separate( t, cfg.CLS_NUM)).to(self.device) output = self.net(x) loss = self.loss(output, t) # Backward self.opt.zero_grad() loss.backward() self.opt.step() loss_list.append(loss.item()) print("epoch >>> {} >>> {}/{}".format( epoch, i, len(self.train))) loss_mean = sum(loss_list) / len(loss_list) f.write(">>> Loss: {}\n".format(loss_mean)) # Save if loss_mean < lower_loss: lower_loss = loss_mean f.write(">>> SAVE COMPLETE! LOWER_LOSS - {}\n".format( lower_loss)) torch.save(self.net.state_dict(), cfg.PARAMS) f.flush()
def meta_prep(cls, target, transformer='gray', verbose=0): print( '=========================== RUNNING THE PIPELINE RECOMMENDATION ===========================\n' ) if (verbose > 0): print('Checking the data and in which case fits better...') total_null = cls.dataframe.isna().sum().sum() num_columns, cat_columns, _, _, _ = separate(cls.dataframe, target) if (cat_columns is None): cat_columns = 0 else: cat_null = cat_columns.isna().sum().sum() cat_columns = 1 if (num_columns is None): num_columns = 0 else: num_null = num_columns.isna().sum().sum() num_columns = 1 if ((cat_columns == 0) and (total_null == 0)): case = 'metamodels/metamodel_case1.pickle' if (verbose > 0): print( 'Case 1: Dataset without categorical data and null values.' )
def hist(X, Y, bins=20): i_figure = 1 clusters = utils.separate(X, Y) # Generate a histogram for the given feature for f in list(X.columns): # Discover number of clusters from label vector Y k = np.unique(Y).size # Feature divided by clusters clusters_f = [] for i in range(0, k): clusters_f.append(clusters[i][f]) # Create seperate figures plt.figure(i_figure) i_figure += 1 # Automatically sets up histogram with bar-stacking style plt.hist(clusters_f, histtype='barstacked', color=colors[0:k], bins=bins) plt.xlabel(f) plt.ylabel('Occurences of ranges') plt.title('Histogram of %s' % f) # Show all the figures at once plt.show()
def recursiveCluster(X, size): Y = KMeans(n_clusters=2, random_state=42).fit_predict(X.values) clusters = utils.separate(X, Y) hierarchy = [Y] for c in clusters: if len(c) > size * 2: hierarchy.append(recursiveCluster(c, size)) else: hierarchy.append(len(c)) return hierarchy
async def emojis(self, ctx): """get a list of emojis from the guild in context""" lis = separate(ctx.guild.emojis, 64) if not lis: return await ctx.reply("This guild has no emojis.") for i in lis: embed = discord.Embed(description="".join(list(map(str, i)))) await ctx.send(embed=embed) await asyncio.sleep(0.51)
def auto_prep(cls, target): total_null = cls.dataframe.isna().sum().sum() num_columns, cat_columns, _, _, _ = separate(cls.dataframe, target) if (cat_columns is None): cat_columns = 0 else: cat_null = cat_columns.isna().sum().sum() cat_columns = 1 if (num_columns is None): num_columns = 0 else: num_null = num_columns.isna().sum().sum() num_columns = 1 if ((cat_columns == 0) and (total_null == 0)): print('Dataset without categorical data and null values.') pipeline = ['no_preparation'] print('No need data preparation.', '\n') elif ((cat_columns == 0) and (num_null > 0)): print( 'Dataset without categorical data but have null values in your set.' ) pipeline = ['imputation_median'] print('Applied techniques: ', pipeline, '\n') elif (((cat_columns == 1) and (total_null == 0)) or ((num_columns == 0) and (total_null == 0))): print( 'Dataset without null values but have categorical data in your set.' ) pipeline = ['label_encoder'] print('Applied techniques: ', pipeline, '\n') elif (((cat_columns == 1) and (cat_null > 0)) or ((num_columns == 0) and (cat_null > 0))): print('Dataset with categorical data and null values.') pipeline = [ 'imputation_deletion_case', 'label_encoder', 'oversampling' ] print('Applied techniques: ', pipeline, '\n') else: print( 'Dataset with categorical data but have null values only in numeric columns.' ) pipeline = ['imputation_median', 'label_encoder'] print('Applied techniques: ', pipeline, '\n') for stage in pipeline: if stage in cls.PIPELINE_OPTIONS: print("Stage --> ", stage) cls.dataframe = cls.PIPELINE_OPTIONS[stage](cls.dataframe, target)
def analyzingSentence(sentence): vectorSentimentValues = pd.DataFrame(columns=dbFeatures) arrWords = {} arrWords["content"] = {} arrWords["score"] = {} vectorSentimentValues.loc[0] = [0] * len(dbFeatures) #loop1 : seperate sentence by delimiters : "." for _sentence1 in ut.separate(sentence): #loop2 : seperate sentence1 by feature for _sentence2 in seperateSentenceByFeatures(_sentence1): #find the feature in sentence2 which is a small part of sentence 1 features = findFeatureInSentence(_sentence2) # print(_sentence2, "[features] : ", features) if len(features) == 1: #take the feature from list features _feature = features[0] #find in corpus that contains relative words to feature # content includes words which is relative features # It actually helps to debug wordsValue = findValuableWordsOfFeature(_sentence2, _feature)[0] content = findValuableWordsOfFeature(_sentence2, _feature)[1] #final Value Of relavant words to the feature finalValue = 0 if len(wordsValue) == 1: #take value of word finalValue = wordsValue[0] elif len(wordsValue) > 1: #take the abs(val) max of all value of words if we have more than one word in one feature for x in wordsValue: if (abs(x) > abs(finalValue) or (abs(x) == abs(finalValue) and x < 0)): finalValue = x arrWords["content"][_feature] = content # arrWords["score"][_feature] = wordsValue arrWords["score"][_feature] = finalValue # print('-', vectorSentimentValues[_feature].any(), type(vectorSentimentValues[_feature].any())) curVal = vectorSentimentValues[_feature].any() if (abs(finalValue) > abs(curVal) or (abs(curVal) == abs(finalValue) and finalValue < 0)): vectorSentimentValues[_feature] = finalValue # vectorSentimentValues[_feature] += finalValue # content includes words which is relative features # It actually helps to debug # content includes words which is relative features # It actually helps to debug return [vectorSentimentValues, arrWords]
def infer_frags2(reads,mfl,extension=2): """Infer fragments as posterior hypotheses in a bayesian setting: P(frags|reads) = P(reads|frags)P(frags)/P(reads) = 1*(\prod lambda^2(1-lambda)^(ri-li))/P(reads)""" lamb = 1/float(mfl) fwds,bcks = separate(lambda (strand,start,stop):strand=='+',reads) fragments = [] alpha = sum(min_seq_length/float(min_seq_length+ell)*lamb*exp(-lamb*ell) for ell in xrange(100000)) alpha = 0.5 for (strand,start,stop) in reads: ext_length = sample_ext_length(lamb,min_seq_length,alpha) if strand == '+': frag = (start,stop + ext_length) else: frag = (start - ext_length,stop) fragments.append(frag) return fragments
def sanity_check(): G = 10000 config = [G/2] mfl = 250 lamb = 1.0/mfl num_frags = 10000 frags = concat([chip(G,config,mfl) for i in xrange(num_frags)]) min_seq_length = 75 sequenced_frags = filter(lambda (start,stop):stop - start > min_seq_length,frags) fd_frags,bk_frags = separate(lambda x:random.random() < 0.5,sequenced_frags) fd_reads = [('+',start,start+min_seq_length) for (start,stop) in fd_frags] bk_reads = [('-',stop-min_seq_length,stop) for (start,stop) in bk_frags] reads = fd_reads + bk_reads inferred_frags = exp_reconstruction(reads,lamb,G) plot_reads(reads,G=G) plt.plot(frag_density(frags,G=G),label="all frags") plt.plot(frag_density(sequenced_frags,G=G),label="seq frags") plt.plot((inferred_frags),label="inferred frags") plt.legend()
def infer_frags(reads,N,mfl=250.0*2,return_acceptance_rate=False): mfl = float(mfl) fwds,bcks = separate(lambda (strand,start,stop):strand=='+',reads) fragments = [] attempts = 0 while len(fragments) < N: _, fwd_start, fwd_stop = random.choice(fwds) _, bck_start, bck_stop = random.choice(bcks) distance = bck_start - fwd_stop attempts += 1 if distance < 0: continue accept_p = exp(-distance/mfl) if random.random() < accept_p: fragments.append((fwd_start,bck_stop)) if True:#len(fragments) % 5 == 0 and len(fragments) > 0: print "accepted:",-fwd_start+bck_stop,len(fragments),len(fragments)/float(attempts) if return_acceptance_rate: return N/float(attempts) else: return fragments
def recovery(): G = 10000 config = [G/2] mfl = 250 lamb = 1/float(mfl) num_frags = 1000 frags = concat([chip(G,config,mfl) for i in xrange(num_frags)]) min_seq_length = 75 sequenced_frags = filter(lambda (start,stop):stop - start > min_seq_length,frags) fd_frags,bk_frags = separate(lambda x:random.random() < 0.5,sequenced_frags) fd_reads = [('+',start,start+75) for (start,stop) in fd_frags] bk_reads = [('-',stop-75,stop) for (start,stop) in bk_frags] reads = fd_reads + bk_reads hyp0 = [int(random.random() < 0.5) for i in range(G)] def f(hyp): return log_likelihood(reads,hyp,lamb,G) def prop(hyp): i = random.randrange(G) hyp_copy = hyp[:] hyp_copy[i] = 1 - hyp_copy[i] return hyp_copy chain = mh(f,prop,hyp0,use_log=True,verbose=True)
async def listbans(self, ctx, mod: discord.User = None): """list bans a given user has made""" mod = mod or ctx.author bans = await ctx.guild.audit_logs( user=mod, action=discord.AuditLogAction.ban).flatten() bans = utils.separate(bans, 20) if not bans: return await ctx.reply(f"There are no bans by **{mod}**.") for i, b_list in enumerate(bans): message = "".join( f"{n + 1}: {b.target} - {utils.dt_format(b.created_at)}\n" \ for n,b in enumerate(b_list) # fancify it uwu ) embed = discord.Embed(description=f"```yaml\n{message}\n```") if i == 0: embed.title = f"Bans by {mod}" # set first message to have title await ctx.send(embed=embed) await asyncio.sleep(0.51)
async def rolestats(self, ctx): """get simple stats on all roles in server""" roles = ctx.guild.roles role_groups = separate(roles, 30) for i, rg in enumerate(role_groups): pt = PT() pt.field_names = ["role", "members", "%"] # add field names for rows pt.align = "l" # left align PrettyTable embed = discord.Embed() for role in rg: name = role.name[:15] + ("..." if len(role.name) > 15 else "" ) # keep role name < 15 chars members = len(role.members) # get role member count percentage = len(role.members) / len( ctx.guild.members ) * 100 # get members:server members ratio pt.add_row([name, members, f"{(percentage):.2f}"]) # add to PT if i == 0: embed.title = "Role stats" embed.description = f"```\n{pt}\n```" await ctx.send(embed=embed) await asyncio.sleep(0.51)
def log(self): #TODO(fix) maintain order msg = utils.separate([self.ip, str(self.port), self.host, self.path, str(utils.headers), '\n', str(self.res.status), self.res.reason, str(self.res.getheaders()), '\n'], logger.sep) utils.dwrite(msg, lout, self.lock)