def __init__(self, fin, h1, h2, outputs, lr, C, pDropHidden1=0.2, pDropHidden2=0.5): # 超参数 self.lr = lr self.C = C self.pDropHidden1 = pDropHidden1 self.pDropHidden2 = pDropHidden2 # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 self.params.append(layerMLPParams((fin, h1))) self.params.append(layerMLPParams((h1, h2))) self.params.append(layerMLPParams((h2, outputs))) # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.matrix('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, pDropHidden1, pDropHidden2) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def information_gain(previous_y, current_y): # Inputs: # previous_y: the distribution of original labels (0's and 1's) # current_y: the distribution of labels after splitting based on a particular # split attribute and split value # TODO: Compute and return the information gain from partitioning the previous_y labels # into the current_y labels. # You will need to use the entropy function above to compute information gain # Reference: http://www.cs.cmu.edu/afs/cs.cmu.edu/academic/class/15381-s06/www/DTs.pdf """ Example: previous_y = [0,0,0,1,1,1] current_y = [[0,0], [1,1,1,0]] info_gain = 0.45915 """ info_gain = 0 y_left = current_y[0] y_right = current_y[1] info_gain = entropy(previous_y) - ( entropy(y_left) * (len(y_left) / len(flatten(current_y))) + (entropy(y_right) * (len(y_right) / len(flatten(current_y))))) return info_gain
def node_dictionary(filename): global P, initPR, newPR, dictionary with open(filename, 'r') as f: for line in f: splitLine = line.split() dictionary[splitLine[0]] = splitLine[1:] #create a list of sinknodes for k in dictionary.keys(): if k not in flatten(dictionary.values()): sinknodes.append(k) #create a list of keys of the dictionary for key in dictionary.keys(): P.append(key) #create initial page rank for each nodes for p in dictionary: initPR[p] = (float("{0:.3f}".format(float(1) / len(P)))) #create total outlinks for each nodes for k in dictionary.keys(): ctr = flatten(dictionary.values()).count(k) outlinks[k] = ctr #call pagerank function with dictionary, and other details calc_page_rank(dictionary, initPR, sinkNodes)
def node_dictionary(filename): global P, initPR, newPR, dictionary with open(filename, 'r') as f: for line in f: splitLine = line.split() dictionary[splitLine[0]] =splitLine[1:] #create a list of sinknodes for k in dictionary.keys(): if k not in flatten(dictionary.values()): sinknodes.append(k) #create a list of keys of the dictionary for key in dictionary.keys(): P.append(key) #create initial page rank for each nodes for p in dictionary: initPR[p] = (float("{0:.3f}".format(float(1)/len(P)))) #create total outlinks for each nodes for k in dictionary.keys(): ctr=flatten(dictionary.values()).count(k) outlinks[k]=ctr #call pagerank function with dictionary, and other details calc_page_rank(dictionary, initPR, sinkNodes)
def classify(query_data, data, newMat, w): n, m = data.shape axis_x = flatten(data[0].tolist()) axis_y = flatten(data[1].tolist()) label = data.values[:, -1] color = ['b', 'r', 'g', 'y', 'c'] # 原始二维散点图 plt.subplot(2, 1, 1) for i in range(n): plt.scatter(axis_x[i], axis_y[i], c=color[int(label[i])], marker='o', s=5) mean_class = data.groupby([m - 1]).mean().values count_class = data.groupby([m - 1]).count().values[:, 0] w0 = sum([x*y*w for x, y in zip(mean_class, count_class)]) / n for dt in query_data: if dt * w - w0[0, 0] > 0: print "class1" else: print "class2" plt.title('raw data') # 变换后只有一维,为了显示直观,y轴随机取值0-1 plt.subplot(2, 1, 2) axis_x = flatten(newMat[:, 0].tolist()) axis_y = [random.random() for _ in range(n)] for j in range(n): plt.scatter(axis_x[j], axis_y[j], c=color[int(label[j])], marker='o', s=5) plt.plot([w0[0, 0]] * n, axis_y) plt.title('new data') plt.show()
def greedy_allocation(EVlist, S, endowment, t): #creates a multiset of reported active marginal values for active agents V = [] for ev in range(len(EVlist)): if (EVlist[ev].arrival <= t and EVlist[ev].departure >= t): if (S >= EVlist[ev].consumption): query_cons = EVlist[ev].consumption else: query_cons = S V.append(EVlist[ev].marginal_values[:query_cons]) #then updates an agents endowment V_help = sorted(list(flatten(V)), reverse=True) #make list of lists, flat array for s in range(S): #for every unit of electricity if V_help: max_v = max(list(flatten(V_help))) #finds max from flat array for ev in range(len(EVlist)): if max_v in EVlist[ev].marginal_values: #add endowment endowment[ev] += 1 #remove values from agent V_help.pop(0) #in case of s>1 EVlist[ev].marginal_values.pop(0) #return endowment as journal guides if empty(V): V.append([0]) return (endowment, flatten(V))
def returnNull(time_series): rank_arr = np.array(np.zeros((time_series.shape[0],time_series.shape[1]))) #copy for array dimension instantiation cntr=0 for col in time_series.T: rank_arr[:,cntr]=rank_expressions(col); cntr+=1 signarr = np.array(np.zeros((rank_arr.shape[0],rank_arr.shape[1]-1))) cntr=0 for row in time_series: signarr[cntr,:] = rank_sign_diff(row); cntr+=1 phase_arrs=[] for row in signarr: arr = calc_phases(row) phase_arrs.append(arr) phase_counts = [] for arrs in phase_arrs: phase_lengths = calc_phase_lengths(arrs) phase_counts.append(phase_lengths) bins = set(flatten(np.arange(1,time_series.shape[1],1))) frequencies = []; for arrs in phase_counts: freqs = calculate_frequencies(arrs,bins) frequencies.append(freqs) popdist = calculate_frequencies(flatten(phase_counts),bins) popdist[popdist==0] = 1e-100 return popdist,bins
def returnNull(time_series): rank_arr = np.array(np.zeros( (time_series.shape[0], time_series.shape[1]))) #copy for array dimension instantiation cntr = 0 for col in time_series.T: rank_arr[:, cntr] = rank_expressions(col) cntr += 1 signarr = np.array(np.zeros((rank_arr.shape[0], rank_arr.shape[1] - 1))) cntr = 0 for row in time_series: signarr[cntr, :] = rank_sign_diff(row) cntr += 1 phase_arrs = [] for row in signarr: arr = calc_phases(row) phase_arrs.append(arr) phase_counts = [] for arrs in phase_arrs: phase_lengths = calc_phase_lengths(arrs) phase_counts.append(phase_lengths) bins = set(flatten(np.arange(1, time_series.shape[1], 1))) frequencies = [] for arrs in phase_counts: freqs = calculate_frequencies(arrs, bins) frequencies.append(freqs) popdist = calculate_frequencies(flatten(phase_counts), bins) popdist[popdist == 0] = 1e-100 return popdist, bins
def write_data(_writer, _events, _labels, _masks, _durations): current_split_midi_num = len(_events) for k in range(current_split_midi_num): padded_events_flat = flatten(_events[k]) padded_labels_flat = flatten(_labels[k]) keys_mask_flat = flatten(_masks[k]) durations_flat = flatten(_durations[k]) # write TFrecord each label and events sampled example = tf.train.Example(features=tf.train.Features( feature={ 'events': tf.train.Feature(int64_list=tf.train.Int64List( value=padded_events_flat)), 'labels': tf.train.Feature(int64_list=tf.train.Int64List( value=padded_labels_flat)), 'keys_mask': tf.train.Feature(int64_list=tf.train.Int64List( value=keys_mask_flat)), 'durations': tf.train.Feature(int64_list=tf.train.Int64List( value=durations_flat)), })) serialized = example.SerializeToString() _writer.write(serialized)
def __init__(self, fin, f1, nin1, f2, nin2, f3, nin3, expand, h1, outputs, lr, C, pDropConv=0.2, pDropHidden=0.5): # 超参数 self.lr = lr self.C = C self.pDropConv = pDropConv self.pDropHidden = pDropHidden # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] self.paramsNIN = [] self.paramsConv = [] # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) self.paramsNIN.append(layerNINParams((f1, fin, nin1, 3, 3), expand)) self.paramsNIN.append(layerNINParams((f2, f1 * expand, nin2, 3, 3), expand)) self.paramsNIN.append(layerNINParams((f3, f2 * expand, nin3, 3, 3), expand)) # 全局平均池化层 self.paramsConv.append(layerConvParams((h1, f3 * expand, 1, 1))) self.paramsConv.append(layerConvParams((outputs, h1, 1, 1))) self.params = self.paramsNIN + self.paramsConv # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.tensor4('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, pDropConv, pDropHidden) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def __init__(self, fin, h1, piece1, h2, piece2, outputs, lr, C, pDropHidden1=0.2, pDropHidden2=0.5): # 超参数 self.lr = lr self.C = C self.pDropHidden1 = pDropHidden1 self.pDropHidden2 = pDropHidden2 # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] hiddens = [] pieces = [] # maxout层,指定piece表示分段线性函数的段数,即使用隐隐层的个数,维度与一般MLP相同,使用跨通道最大池化 self.params.append(layerMLPParams((fin, h1 * piece1))) hiddens.append(h1) pieces.append(piece1) self.params.append(layerMLPParams((h1, h2 * piece2))) hiddens.append(h2) pieces.append(piece2) self.params.append(layerMLPParams((h2, outputs))) # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.matrix('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, hiddens, pieces, pDropHidden1, pDropHidden2) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, hiddens, pieces, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def maptest14bus_test_system(comp_filename, start_range, contingency_range, max_load_loss_contingency): from compiler.ast import flatten # Method Variables Initialization CMB = [] CMB_new = [] temp_valueset = [] new_valueset = [] valueset_new = [] valueset = comp_filename valueset_new = list(flatten(valueset)) iter_max_load_loss_outage = max_load_loss_contingency max_load_loss_outage = iter_max_load_loss_outage max_load_loss_outage = list(flatten(max_load_loss_outage)) # Remove the identified transmission lines for elem in range(0, len(max_load_loss_outage)): valueset_new.remove(max_load_loss_outage[elem]) new_valueset = [ valueset_new[i:i + 1] for i in range(0, len(valueset_new), 1) ] # Creating a new contingency list for i in range(0, len(new_valueset)): temp_valueset = new_valueset[i] temp_iter_max_load_loss_outage = iter_max_load_loss_outage[0] iter_temp_comb = temp_valueset + temp_iter_max_load_loss_outage CMB.append(iter_temp_comb) CMB_new.append(CMB) return CMB_new
def input_MI_data(filename1,filename2,filename3,labelname): temp1 = np.ones((36, 1)) temp2 = np.ones((24, 1)) temp = np.ones((60, 1)) data1 = scio.loadmat(filename1)['ss'] data2 = scio.loadmat(filename2)['ss'] data3 = scio.loadmat(filename3)['ss'] label = scio.loadmat(labelname)['label'] label = label - temp ''' 原始数据维度data1[20,30720] 最终数据维度data[60,3channel*256Hz*6s] ''' data = np.zeros((60, 4608), np.float32) #4608 = 64*72 for i in range(20): data[i] = flatten(data1[:,i*256*6:(i+1)*256*6].tolist()) data[i+20] = flatten(data2[:,i*256*6:(i+1)*256*6].tolist()) data[i+40] = flatten(data3[:,i*256*6:(i+1)*256*6].tolist()) x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.4, random_state=0) print x_train.shape print x_test.shape print y_train.shape print y_test.shape print y_train return x_train, np.transpose(y_train), x_test, np.transpose(y_test) #input_MI_data('sxq1_Segmentation.dat.mat','sxq2_Segmentation.dat.mat','sxq3_Segmentation.dat.mat')
def getName(node): if node is None: return '' if isinstance(node, (basestring, int, long, float)): return str(node) if isinstance(node, (ast.Class, ast.Name, ast.Function)): return node.name if isinstance(node, ast.Dict): pairs = ['%s: %s' % pair for pair in [(getName(first), getName(second)) for (first, second) in node.items]] return '{%s}' % ', '.join(pairs) if isinstance(node, ast.CallFunc): notArgs = [n for n in node.getChildNodes() if n not in node.args] return getNameTwo('%s(%s)', notArgs, node.args, rightJ=', ') if isinstance(node, ast.Const): try: float(node.value) return str(node.value) except: return repr(str(node.value)) if isinstance(node, ast.LeftShift): return getNameTwo('%s<<%s', node.left, node.right) if isinstance(node, ast.RightShift): return getNameTwo('%s>>%s', node.left, node.right) if isinstance(node, (ast.Mul, ast.Add, ast.Sub, ast.Power, ast.Div, ast.Mod)): return getNameMath(node) if isinstance(node, ast.Bitor): return '|'.join(map(getName, node.nodes)) if isinstance(node, ast.UnarySub): return '-%s' % ''.join(map(getName, ast.flatten(node))) if isinstance(node, ast.List): return '[%s]' % ', '.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Tuple): return '(%s)' % ', '.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Lambda): return 'lambda %s: %s' % (', '.join(map(getName, node.argnames)), getName(node.code)) if isinstance(node, ast.Getattr): return '.'.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Compare): rhs = node.asList()[-1] return '%s %r' % (' '.join(map(getName, node.getChildren()[:-1])), rhs.value) if isinstance(node, ast.Slice): children = node.getChildren() slices = children[2:] formSlices = [] for sl in slices: if sl is None: formSlices.append('') else: formSlices.append(getName(sl)) sliceStr = ':'.join(formSlices) return '%s[%s]' % (getName(children[0]), sliceStr) if isinstance(node, ast.Not): return "not %s" % ''.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Or): return " or ".join(map(getName, node.nodes)) if isinstance(node, ast.And): return " and ".join(map(getName, node.nodes)) if isinstance(node, ast.Keyword): return "%s=%s" % (node.name, getName(node.expr)) return repr(node)
def checkMatch(tag, tagFound): # first, see if body text is a data field if len(tag.contents) == 1 and getTagText(tag).count("%") == 1: if len(getTagText(tagFound).strip()) <= 0: #print "Returning FALSE match due to empty tag text" return False # remove data attributes and check rest with found tag to see if match tagAttrSet = { x for x in tag.attrs if type(tag[x]) is list or ( tag[x].find("%") < 0 and x.find("interleech") < 0) } #print "Tag Attribute Set:", tagAttrSet # some attributes have simple values, add them to a set tagValueSet = {tag[x] for x in tagAttrSet if type(tag[x]) is not list} #print "Tag Value Set:", tagValueSet tagFoundValueSet = { tagFound[x] for x in tagAttrSet if type(tagFound[x]) is not list } #print "Tag Found Value Set:", tagFoundValueSet # other attributes have lists of values, add them to a new list # then flatten and add to a set to be compared tagValueSet2 = set( flatten([tag[x] for x in tagAttrSet if type(tag[x]) is list])) tagFoundValueSet2 = set( flatten([tagFound[x] for x in tagAttrSet if type(tagFound[x]) is list])) if tagValueSet == tagFoundValueSet and tagValueSet2 == tagFoundValueSet2: #print "MATCH FOUND! ", tagFound.name, " with attrs: ", tagFound.attrs , " with text: ", tagFound.get_text().strip()[:10] return True return False
def evaluate_words(dictssentence,keyword,new=True): # for word !!! dictsword_tmp = {key: sentence_evaluate(value) for key, value in dictssentence.items() if len(sentence_evaluate(value)) > 0} dictsword_tmp2 = map(lambda z: z[1], dictsword_tmp.items()) corpus = map(lambda z: " ".join(z), dictsword_tmp2) wordlists = top50words(corpus) # sentence lists if(new): if keyword[0] is None: words_final = map(lambda (word, importance): word, wordlists)[:100] else: dictsword_tmp3 = list(set(flatten(dictsword_tmp2))) word_potential = map(lambda t: (t,similar_check_higher(t, keyword)), flatten(dictsword_tmp3)) words_2 = sorted(word_potential, key = lambda (word, score):score, reverse =True) words_3 = filter(lambda (key,score):score != -1,words_2) words_final = map(lambda (key, score): key, words_3)[:100] # word importance based on word2vec # ids:word index: order id score: vector word_vector_dicts = [distattr2(word, word2vec_evaluate(word)) for word in words_final if len(word2vec_evaluate(word)) != 0] try: final_list = map(lambda x: (x.ids,x.score),textrankgetter(word_vector_dicts, False)) return final_list except: return wordlists else: return wordlists
def page_rank(matrix, nodes, beta=0.85, epsilon=None, period=None): #input parameters-for testing #matrix=[1./2,1./2,0,1./2,0,0,0,1./2,1] #beta=0.8 #nodes=3 #epsilon=0.0001 t=0 #comprehensions teleport_matrix=[] teleport_matrix=[((1-beta)*1.0/nodes) for i in (range(nodes)*nodes)] flatten(matrix) matrix=[(float(i)*beta) for i in matrix] alpha=[(i+j) for i,j in zip(matrix,teleport_matrix)] alpha_array=numpy.matrix(alpha) alpha_array=alpha_array.reshape(nodes,nodes) ###################### page_ranks=defaultdict(list) gen=[i for i in range(nodes)] for i in gen: page_ranks[i].append(0.) page_ranks[i].append(1./nodes) t=0 def vectorize(page_ranks): vector=[] for i in page_ranks: vector.append(page_ranks[i][-1]) c=numpy.matrix(vector).transpose() return c if epsilon is None and period is not None: while t<period: current_page_ranks=alpha_array*vectorize(page_ranks) for i, n in enumerate(current_page_ranks): page_ranks[i].append(float(n)) t+=1 elif epsilon is not None and period is None: while float(abs(page_ranks[i][-1]-page_ranks[i][-2]))>epsilon: current_page_ranks=alpha_array*vectorize(page_ranks) for i, n in enumerate(current_page_ranks): page_ranks[i].append(float(n)) t+=1 else: print "Please enter either an Epsilon or Period parameter. Both cannot be empty and both cannot be given!" for i in page_ranks: print "The Page Ranks Are:",i," ",page_ranks[i][-1] print "It took %d iterations" % (t)
def conductance(comm, hyperedges, node_tags=None): """Compute clustering conductance measure Parameters ---------- comm: list or dict community list of community dictionary hyperedges: list of lists list of hyperedges node_tags: list of str, optional, if comm is a dictionary, specify correct order of keys list of partite labels, as found in the hyperedges Returns ------- scores: dictionary conductance scores """ if isinstance(comm, list): comm_dict, node_tags = community_array_to_community_dict( comm, hyperedges) else: if not node_tags: node_tags = partite_order(hyperedges, comm) comm_dict = comm out_edges = {} degs = {} _he_comm_list = [] for he in hyperedges: _he_comm_list = [ comm_dict[node_tags[ind]][n] for ind, n in enumerate(he) ] #nominator if any(isinstance(x, list) for x in _he_comm_list): #overlapping case _he_comm_counts = dict(Counter(flatten(_he_comm_list))) for n in _he_comm_counts: if _he_comm_counts[n] < len(comm_dict): if n not in out_edges: out_edges[n] = 1 else: out_edges[n] = 1 + out_edges[n] elif len(sp.unique(_he_comm_list)) > 1: #non overlapping case for n in sp.unique(_he_comm_list): if n not in out_edges: out_edges[n] = 1 else: out_edges[n] = 1 + out_edges[n] for n in flatten(_he_comm_list): #denominator if n not in degs: degs[n] = 1 else: degs[n] = 1 + degs[n] scores = {} for c in degs: other_deg = sp.array([v for k, v in degs.items() if k != c]).sum() scores[c] = float(out_edges[c]) / min( [float(degs[c]), float(other_deg)]) return scores
def funcgamewon(): flattenedcompdata=flatten(compdata) flatteneduserdata=flatten(userdata) if True in flattenedcompdata or True in flatteneduserdata: gamewon[0]=True gamewon[1]=flatteneduserdata gamewon[2]=flattenedcompdata return gamewon
def dfsForTrees(solution, tree_pos, row, lizards): size = len(solution) all_positions = generateLizardPositionsForTreesDfs(solution, tree_pos, row) stack = [] stack += all_positions position = None if not stack: if getHeuristicCostForTrees(makeBoard(solution, tree_pos, 0))==0 and lizards == sum(x is not None for x in flatten(solution)): return True if row > len(board)-2: return False stack = generateLizardPositionsForTreesDfs(solution, tree_pos, row+1) row = row + 1 while stack: position = stack.pop() if type(position) is int: position = [row, position] else: position = [row, list(position)] if(isValidSolutionForTrees(solution, position[0], position[1], lizards, size)): solution[row] = position[1] #To keep the last location on board when returned otherwise no last location on board return True if(isValidPositionForTrees(solution, tree_pos, position[0], position[1])): if(row > len(board)-2): #To keep the dfs from going beyond the rows of the board and allow it to backtrack to another solution solution[position[0]] = position[1] #To remove the last location in placed of lizard return False solution[position[0]] = position[1] rem_placed = lizards - sum(x is not None for x in flatten(solution)) if(maxLizards(makeBoard(solution, tree_pos, row+1)) >= rem_placed): explored = dfsForTrees(solution, tree_pos, row+1, lizards) if explored: return True else: solution[position[0]] = None else: solution[position[0]] = None return False if(time.time()-start_time > 290): return False rem_placed = lizards - sum(x is not None for x in flatten(solution)) if(maxLizards(makeBoard(solution, tree_pos, row+1)) >= rem_placed): #To skip rows with no possible placement in order to try for other rows or else dfs will fail if(row > len(board)-2): #To keep the dfs from going beyond the rows of the board and allow it to backtrack to another solution if type(position) is None: return False solution[position[0]] = None #To remove the last location in placed of lizard return False explored = dfsForTrees(solution, tree_pos, row+1, lizards) if explored: return True else: if position is not None: solution[position[0]] = None return False # board[position[0]][position[1]] = 0 return False
def buildData(textFile, sYr, labelFile): # Make array out of text data path = _get_data('../../data', 'analysis') to_read = os.path.join(path, textFile) texts = loadJSON(to_read) tYr = [] for slice in texts: tYr.extend([str(sYr)] * len(slice)) sYr += 1 texData = np.empty([len(tYr), 3], dtype=np.object) texData[:, 0] = [dlPull(texts, 'nameClean')[ii] + '_' + str(tYr[ii]) for ii in range(0, len(tYr))] texData[:, 1] = [' '.join(x) for x in dlPull(texts, 'dataClean')] texData[:, 2] = [len(x) for x in dlPull(texts, 'dataClean')] # Make array out of labeled data regime_path = _get_data('../../data', 'regimeData') labelFile = os.path.join(regime_path, labelFile) labList = [] with open(labelFile, 'rU') as d: next(d) reader = csv.reader(d) for row in reader: cntryYr = row[1].lower() + '_' + str(row[2]) labels = [row[col] for col in range(3, len(row))] labList.append(flatten([cntryYr, labels])) labData = np.array(labList) # Find intersections and differences inBoth = list(set(texData[:, 0]) & set(labData[:, 0])) niLab = list(set(texData[:, 0]) - set(labData[:, 0])) niTex = list(set(labData[:, 0]) - set(texData[:, 0])) ###### #This is where modifications have to happen depending on whether you're #doing in-sample or true out-of-sample work. This is a difficult way to #do this, but changing it requires changing the entirety of how the system #works, from scrape to analysis. ###### c = np.unique([x.split('_')[0] for x in inBoth]) d = flatten([['{}_{}'.format(country, year) for country in c] for year in tYr]) tMatches = flatten( [[i for i, x in enumerate(texData[:, 0]) if x == cyr] for cyr in d]) # tMatches = flatten( # [[i for i, x in enumerate(texData[:, 0]) if x == cyr] for cyr in inBoth]) # lMatches = flatten( # [[i for i, x in enumerate(labData[:, 0]) if x == cyr] for cyr in inBoth]) tlData = texData[tMatches, ] # tlData = np.hstack( # (texData[tMatches, ], # labData[lMatches, 1:labData.shape[1]])) return tlData
def getName(node): if node is None: return '' if isinstance(node, (basestring, int, long, float)): return str(node) if isinstance(node, (ast.Class, ast.Name, ast.Function)): return node.name if isinstance(node, ast.Dict): pairs = [ '%s: %s' % pair for pair in [(getName(first), getName(second)) for (first, second) in node.items] ] return '{%s}' % ', '.join(pairs) if isinstance(node, ast.CallFunc): notArgs = [n for n in node.getChildNodes() if n not in node.args] return getNameTwo('%s(%s)', notArgs, node.args, rightJ=', ') if isinstance(node, ast.Const): try: float(node.value) return str(node.value) except: return repr(str(node.value)) if isinstance(node, ast.LeftShift): return getNameTwo('%s<<%s', node.left, node.right) if isinstance(node, ast.RightShift): return getNameTwo('%s>>%s', node.left, node.right) if isinstance(node, (ast.Mul, ast.Add, ast.Sub, ast.Power, ast.Div, ast.Mod)): return getNameMath(node) if isinstance(node, ast.Bitor): return '|'.join(map(getName, node.nodes)) if isinstance(node, ast.UnarySub): return '-%s' % ''.join(map(getName, ast.flatten(node))) if isinstance(node, ast.List): return '[%s]' % ', '.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Tuple): return '(%s)' % ', '.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Lambda): return 'lambda %s: %s' % (', '.join(map( getName, node.argnames)), getName(node.code)) if isinstance(node, ast.Getattr): return '.'.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Compare): rhs = node.asList()[-1] return '%s %r' % (' '.join(map(getName, node.getChildren()[:-1])), rhs.value) if isinstance(node, ast.Slice): children = node.getChildren() return '%s[%s%s]' % (getName(children[0]), ':', children[-1].value) if isinstance(node, ast.Not): return "not %s" % ''.join(map(getName, ast.flatten(node))) if isinstance(node, ast.Or): return " or ".join(map(getName, node.nodes)) if isinstance(node, ast.And): return " and ".join(map(getName, node.nodes)) if isinstance(node, ast.Keyword): return "%s=%s" % (node.name, getName(node.expr)) return repr(node)
def draw_matrix(figure_number): order_w = [[abs(w[m_row][m_col]) for m_row in flatten(row_indices)] for m_col in flatten(col_indices)] plt.figure(figure_number) plt.clf() plt.title("Weight matrix sparsification") plt.imshow(order_w) plt.colorbar(orientation='horizontal') plt.pause(0.001)
def flatten(seq,container=None): if container is None: container = [] for s in seq: if hasattr(s,'__iter__'): flatten(s,container) else: container.append(s) return container
def page_rank(matrix, nodes, beta=0.85, epsilon=None, period=None): #input parameters-for testing #matrix=[1./2,1./2,0,1./2,0,0,0,1./2,1] #beta=0.8 #nodes=3 #epsilon=0.0001 t = 0 #comprehensions teleport_matrix = [] teleport_matrix = [((1 - beta) * 1.0 / nodes) for i in (range(nodes) * nodes)] flatten(matrix) matrix = [(float(i) * beta) for i in matrix] alpha = [(i + j) for i, j in zip(matrix, teleport_matrix)] alpha_array = numpy.matrix(alpha) alpha_array = alpha_array.reshape(nodes, nodes) ###################### page_ranks = defaultdict(list) gen = [i for i in range(nodes)] for i in gen: page_ranks[i].append(0.) page_ranks[i].append(1. / nodes) t = 0 def vectorize(page_ranks): vector = [] for i in page_ranks: vector.append(page_ranks[i][-1]) c = numpy.matrix(vector).transpose() return c if epsilon is None and period is not None: while t < period: current_page_ranks = alpha_array * vectorize(page_ranks) for i, n in enumerate(current_page_ranks): page_ranks[i].append(float(n)) t += 1 elif epsilon is not None and period is None: while float(abs(page_ranks[i][-1] - page_ranks[i][-2])) > epsilon: current_page_ranks = alpha_array * vectorize(page_ranks) for i, n in enumerate(current_page_ranks): page_ranks[i].append(float(n)) t += 1 else: print "Please enter either an Epsilon or Period parameter. Both cannot be empty and both cannot be given!" for i in page_ranks: print "The Page Ranks Are:", i, " ", page_ranks[i][-1] print "It took %d iterations" % (t)
def mdoc_compile_unit((cu, die)): notOrdered = [child for child in die.iter_children()] # all subprogram which is a kind of function should be placed at bottom children = flatten(splitBy(isTagNotSubprogram, notOrdered)) # [M doc] definitions = [mdoc((cu, child)) for child in children] # (a -> b) -> F a -> F b return fmap(lambda xs: flatten(intersperse(xs, P.newline())), sequence(definitions))
def func(label): if label == 'tminplot': tmin_plot.set_visible(not tmin_plot.get_visible()) for i in flatten(tmin_error): if i: i.set_visible(not i.get_visible()) elif label == 'emass': for i in flatten(emass_plot): if i: i.set_visible(not i.get_visible()) plt.draw()
def get_consts(clause): words_pattern = '[A-Za-z0-9_]+' functs_pattern = '(\w+)\s*\(' if isinstance(clause,list): words = flatten([find(x,words_pattern) for x in clause]) functors = flatten([find(x,functs_pattern) for x in clause]) if isinstance(clause,basestring): words = find(clause,words_pattern) functors = find(clause,functs_pattern) consts = [x for x in words if not x in functors] return tuple(consts)
def get_consts(clause): words_pattern = "[A-Za-z0-9_]+" functs_pattern = "(\w+)\s*\(" if isinstance(clause, list): words = flatten([find(x, words_pattern) for x in clause]) functors = flatten([find(x, functs_pattern) for x in clause]) if isinstance(clause, basestring): words = find(clause, words_pattern) functors = find(clause, functs_pattern) consts = [x for x in words if not x in functors] return tuple(consts)
def longest_common_string(str1, str2): break_strings = lambda __str: flatten([[e[0:i+1] for i in range(0, len(e))] for e in __str.split(" ")]) common_strings = len(list(set.intersection(set(break_strings(str1)), set(break_strings(str2))))) min_length = min(sum([len(e) for e in str1.split(" ")]), sum([len(e) for e in str2.split(" ")])) max_length = max(sum([len(e) for e in str1.split(" ")]), sum([len(e) for e in str2.split(" ")])) if min_length == min([len(e) for e in flatten([str1.split(" "), str2.split(" ")])]): return float(common_strings)/max_length return float(common_strings)/min_length
def __init__(self, sent_sentiment_nps, sentences, __eatery_name): if __eatery_name: self.list_to_exclude = flatten([ "food", "service", "cost", "ambience", "place", "Place", "i", "great", "good", __eatery_name.lower().split(), "rs", "delhi", "india", "indian" ]) #self.list_to_exclude = ["food", "service", "cost", "ambience", "delhi", "Delhi", # "place", "Place", __eatery_name.lower().split()] else: self.list_to_exclude = [ "food", "i", "service", "cost", "ambience", "delhi", "Delhi", "place", "Place", "india", "indian" ] self.sentences = sentences self.sent_sentiment_nps = sent_sentiment_nps self.merged_sent_sentiment_nps = self.merge_similar_elements() print self.sentences[0:2], print self.sent_sentiment_nps[0:2] assert(set(Counter(self.merged_sent_sentiment_nps.keys()).values()) == {1}),\ "merge_similar_elements method has an error as all the keys are not unique" new_list = list() __sorted = sorted(self.merged_sent_sentiment_nps.keys()) self.list_to_exclude = flatten(self.list_to_exclude) #self.NERs = self.ner() self.keys = self.merged_sent_sentiment_nps.keys() self.clusters = list() self.result = list() self.filter_clusters() #The noun phrases who were not at all in the self.clusters self.without_clusters = set.difference(set(range(0, len(self.keys))), set(flatten(self.clusters))) self.populate_result() self.common_ners = list( set.intersection(set([e[0] for e in self.ner()]), set([e[0] for e in self.custom_ner()]))) self.result = self.filter_on_basis_pos_tag() self.result = sorted(self.result, reverse=True, key=lambda x: x.get("positive") + x.get( "negative") + x.get("neutral"))
def getName(node): if node is None: return "" if isinstance(node, (basestring, int, long, float)): return str(node) if isinstance(node, (ast.Class, ast.Name, ast.Function)): return node.name if isinstance(node, ast.Dict): pairs = ["%s: %s" % pair for pair in [(getName(first), getName(second)) for (first, second) in node.items]] return "{%s}" % ", ".join(pairs) if isinstance(node, ast.CallFunc): notArgs = [n for n in node.getChildNodes() if n not in node.args] return getNameTwo("%s(%s)", notArgs, node.args, rightJ=", ") if isinstance(node, ast.Const): try: float(node.value) return str(node.value) except: return repr(str(node.value)) if isinstance(node, ast.LeftShift): return getNameTwo("%s<<%s", node.left, node.right) if isinstance(node, ast.RightShift): return getNameTwo("%s>>%s", node.left, node.right) if isinstance(node, (ast.Mul, ast.Add, ast.Sub, ast.Power, ast.Div, ast.Mod)): return getNameMath(node) if isinstance(node, ast.Bitor): return "|".join(map(getName, node.nodes)) if isinstance(node, ast.UnarySub): return "-%s" % "".join(map(getName, ast.flatten(node))) if isinstance(node, ast.List): return "[%s]" % ", ".join(map(getName, ast.flatten(node))) if isinstance(node, ast.Tuple): return "(%s)" % ", ".join(map(getName, ast.flatten(node))) if isinstance(node, ast.Lambda): return "lambda %s: %s" % (", ".join(map(getName, node.argnames)), getName(node.code)) if isinstance(node, ast.Getattr): return ".".join(map(getName, ast.flatten(node))) if isinstance(node, ast.Compare): rhs = node.asList()[-1] return "%s %r" % (" ".join(map(getName, node.getChildren()[:-1])), rhs.value) if isinstance(node, ast.Slice): children = node.getChildren() return "%s[%s%s]" % (getName(children[0]), ":", children[-1].value) if isinstance(node, ast.Not): return "not %s" % "".join(map(getName, ast.flatten(node))) if isinstance(node, ast.Or): return " or ".join(map(getName, node.nodes)) if isinstance(node, ast.And): return " and ".join(map(getName, node.nodes)) if isinstance(node, ast.Keyword): return "%s=%s" % (node.name, getName(node.expr)) return repr(node)
def RunImportedModel(order, cog_p_r, inh_p_r, input_X): shape_v = input_X.shape n = shape_v[0] cog_p_r_l = flatten(cog_p_r.tolist()) inh_p_r_l = flatten(inh_p_r.tolist()) inp_l = flatten(input_X[0].tolist()) rans = ao.get_result(cp.n_id, order, inp_l, inh_p_r_l, cog_p_r_l) for i in range(n): if i == 0: continue inp_l = flatten(input_X[i].tolist()) tans = ao.get_result(cp.n_id, order, inp_l, inh_p_r_l, cog_p_r_l) rans = np.row_stack((rans, tans)) return np.mat(rans)
def __init__(self, fin, f1, piece1, f2, piece2, f3, piece3, h1, pieceh1, h2, pieceh2, outputs, lr, C, pDropConv=0.2, pDropHidden=0.5): # 超参数 self.lr = lr self.C = C self.pDropConv = pDropConv self.pDropHidden = pDropHidden # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] self.paramsCNN = [] self.paramsMLP = [] mapunits = [] pieces = [] # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) self.paramsCNN.append(layerCNNParams((f1 * piece1, fin, 3, 3))) # conv: (32, 32) pool: (16, 16) mapunits.append(f1) pieces.append(piece1) self.paramsCNN.append(layerCNNParams((f2 * piece2, f1, 3, 3))) # conv: (16, 16) pool: (8, 8) mapunits.append(f2) pieces.append(piece2) self.paramsCNN.append(layerCNNParams((f3 * piece3, f2, 3, 3))) # conv: (8, 8) pool: (4, 4) mapunits.append(f3) pieces.append(piece3) # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 self.paramsMLP.append(layerMLPParams((f3 * 4 * 4, h1 * pieceh1))) mapunits.append(h1) pieces.append(pieceh1) self.paramsMLP.append(layerMLPParams((h1, h2 * pieceh2))) mapunits.append(h2) pieces.append(pieceh2) self.paramsMLP.append(layerMLPParams((h2, outputs))) self.params = self.paramsCNN + self.paramsMLP # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.tensor4('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, mapunits, pieces, pDropConv, pDropHidden) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, mapunits, pieces, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def visit_BY(self, node, visited_children): children = flatten(visited_children) if isinstance(children[0], Gensuha): value = children # ybu = Y + BU else: value = Cmavo("".join(children), node.expr_name) return value
def getVpt(v, k): v = abs(v) vList = v.tolist() vList = flatten(vList) vList.sort() k = (int)(k * len(vList)) return vList[k]
def get_index_pages_links_list(self, base_url): logging.info("start page:%s" % base_url) try: f = urllib2.urlopen(base_url) web_str = f.read() soup = BeautifulSoup(web_str, from_encoding="unicode") urls_bs4_tage_list = soup.find("div", "page").find_all("a") urls_tag_str_list = map(lambda url_bs4_tag: str(url_bs4_tag), urls_bs4_tage_list) logging.info("urls_tag_str_list:%s" % urls_tag_str_list) logging.info("type(urls_tag_str_list):%s" % type(urls_tag_str_list)) logging.info("urls_tag_str_list[0]:%s" % urls_tag_str_list[0]) logging.info("type(urls_tag_str_list[0]):%s" % type(urls_tag_str_list[0])) raw_urls_str_2d_list = map( lambda url_tag_str: re.findall(r'href="(.*)" title', url_tag_str), urls_tag_str_list) pages_links_list = list(set(flatten(raw_urls_str_2d_list))) pages_links_list.append(base_url) except Exception as e: logging.error(e) return pages_links_list
def merge_and_write_output(pos_dict,seq_strandness_dict,seq_QNAME_dict,seq_RNAME_dict,outpath): """ function :write output input: four dict (pos_dict,seq_strandness_dict,seq_QNAME_dict,seq_RNAME_dict) outpath """ global merge_read f=open(outpath,"w") for seq in seq_strandness_dict: strandness=seq_strandness_dict[seq] f.write("\t".join([seq_QNAME_dict[seq],strandness])) f.write("\n") f.write("position in hg19\tRNA sequence") f.write("\n") l_big_read=flatten([pos_dict[RNAME] for RNAME in seq_RNAME_dict[seq] if RNAME in pos_dict]) l_big_read=list(set(l_big_read)) l_big_read=[x for x in l_big_read if return_chr_len(x)<500] print l_big_read l_merge_read=merge_read(l_big_read) sequence_for_test="NA" for pos in l_merge_read: if strandness=="+": sequence_for_test=T2U(get_seq(pos)) elif strandness=="-": sequence_for_test=T2U(rc(get_seq(pos))) f.write("\t".join([pos,sequence_for_test])) f.write("\n") f.close()
def naive_most_used_word(raw_company_names): max_words = round(np.mean(map(lambda company_name: len(company_name.split()), raw_company_names))) words = flatten(map(lambda company_name: company_name.split(), raw_company_names)) counter = collections.Counter(words) first_max_words_names = (itertools.islice(counter.most_common(), 0, max_words)) unified_name = " ".join(["".join(name[0]) for name in first_max_words_names]) return create_unified_map(raw_company_names, unified_name)
def get_pulled_images(self, docker_host): get_images_url = self.__get_vm_url(docker_host) + "/images/json?all=0" current_images_info = json.loads( requests.get(get_images_url).content) # [{},{},{}] current_images_tags = map(lambda x: x['RepoTags'], current_images_info) # [[],[],[]] return flatten(current_images_tags) # [ imange:tag, image:tag ]
def composeFeatures (self): alldata=[] labeling=[] unpr= len(distances) print "complete data length", unpr for i in range(unpr): di=distances[i] vfi=fingers[i] vhi=hand[i] labeli = labels[i] assert self.equal_ts(map(self.get_ts, [vfi,vhi, labeli])) labeli= self.get_feature(labeli) labeli = map( int, labeli) features= map (self.get_feature, [di, vfi,vhi]) if not (True in map (self.empty, features)) and \ not (self.contains_class(0, labeli)) and \ not (self.contains_class(15, labeli)): features= flatten (features) labeli = self.labelFingers(labeli) labeling += [labeli] alldata +=[map(float, features)] X = np.array(alldata) Y = np.array(labeling) return X,Y
def get_vars(clause): var_pattern = "\W[A-Z][A-Za-z0-9_]+|\W[A-Z]" if isinstance(clause, list): _vars = set(flatten([find(x, var_pattern) for x in clause])) if isinstance(clause, basestring): _vars = set(find(clause, var_pattern)) return tuple(_vars)
def fts_async_product(self,fts2): assert type(fts2) == FTS result = FTS(set([]),set([])) nodes_prod = lambda u,v: tuple((list(u) if self.size>1 else [u]) + (list(v) if fts2.size>1 else [v])) labels_prod = lambda u,v: flatten((u,v)) labels = nx.get_node_attributes(self,'label') labels_fts2 = nx.get_node_attributes(fts2,'label') for u in self.nodes(): for v in fts2.nodes(): result.add_node(nodes_prod(u,v),label=labels_prod(labels[u],labels_fts2[v]),weight=1.0) for (u,v) in self.edges(): for (x,y) in fts2.edges(): result.add_edge(nodes_prod(u,x),nodes_prod(u,y),weight=1.0) result.add_edge(nodes_prod(u,x),nodes_prod(v,x),weight=1.0) result.add_edge(nodes_prod(u,x),nodes_prod(v,y),weight=1.0) for u in self.graph['initial']: for v in fts2.graph['initial']: result.graph['initial'].add(nodes_prod(u,v)) for u in self.graph['symbols']: for v in fts2.graph['symbols']: result.graph['symbols'].add(nodes_prod(u,v)) result.size = self.size + fts2.size return copy.deepcopy(result)
def constructFeatureVector(self, featureDictObjectA, featureDictObjectB, selectedFeatures): assert ("Global<Maximum >" not in selectedFeatures) assert ("Global<Minimum >" not in selectedFeatures) assert ("Histrogram" not in selectedFeatures) assert ("Polygon" not in selectedFeatures) features = [] for key in selectedFeatures: if key == 'RegionCenter': continue else: if not isinstance( featureDictObjectA[key], np.ndarray) or featureDictObjectA[key].size == 1: features.append( float(featureDictObjectA[key]) - float(featureDictObjectB[key])) else: features.extend(flatten((featureDictObjectA[key].astype('float32') \ - featureDictObjectB[key].astype('float32')).tolist())) # there should be no nans or infs assert (np.all(np.isfinite(np.array(features)))) return features
def generate_dict(df, columns): def convert_row(x): ret = [] for feat_name, feat_values in x.asDict().items(): if not feat_values: continue for feat_value in str(feat_values).split(','): ret.append((feat_name, feat_value)) return ret ## operate DataFrame by rdd # select column df = df[columns] # convert Row to tuple, and flat them rdd = df.rdd.flatMap(convert_row) # stat frequency rdd = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y) #rdd = rdd.map(lambda x: '\t'.join([str(e) for e in flatten(x)])) rdd = rdd.map(lambda x: flatten(x)) print("dict rdd:", rdd) input_schema = StructType() input_schema.add(StructField("feature", StringType(), True)) input_schema.add(StructField("value", StringType(), True)) input_schema.add(StructField("count", StringType(), True)) df = sqlContext.createDataFrame(rdd, input_schema) return df
def find_axes2(nodeSet, candidates): """ Returns all possible coherent axes :param node: Current node ([ballot_set], upper_bound) :param candidates: List of candidates :return: List of possible axes, False if none found """ L = [] ballots = transform_ballots(nodeSet) # Regrouper les bulletins qui contiennent le candidat c for c in candidates: S = Set([]) for ballot in ballots: if c in ballot: S += Set([ballot]) S += Set([Set([c])]) L += Set([S]) # Transformer liste de Set en PQ-tree et aligner axes = P(L) for ballot in ballots: try: axes.set_contiguous(ballot) except: return False, 0 # Determiner les axes à partir des alignements trouvés all_axes = [] # Liste des axes for axis in axes.orderings(): A = [] for ballot_set in flatten(axis): A += [L.index(ballot_set)+1] all_axes += [A] axes_filtered = filter_symmetric_axes(all_axes) return axes_filtered, axes.cardinality()
def send_frequency_reminder(self): # We exclude irrelevant frequencies. frequencies = [f for f in UPDATE_FREQUENCIES.keys() if f not in ('unknown', 'realtime', 'punctual')] now = datetime.now() reminded_orgs = {} reminded_people = [] allowed_delay = current_app.config['DELAY_BEFORE_REMINDER_NOTIFICATION'] for org in Organization.objects.visible(): outdated_datasets = [] for dataset in Dataset.objects.filter( frequency__in=frequencies, organization=org).visible(): if dataset.next_update + timedelta(days=allowed_delay) < now: dataset.outdated = now - dataset.next_update dataset.frequency_str = UPDATE_FREQUENCIES[dataset.frequency] outdated_datasets.append(dataset) if outdated_datasets: reminded_orgs[org] = outdated_datasets for reminded_org, datasets in reminded_orgs.iteritems(): print(u'{org.name} will be emailed for {datasets_nb} datasets'.format( org=reminded_org, datasets_nb=len(datasets))) recipients = [m.user for m in reminded_org.members] reminded_people.append(recipients) subject = _('You need to update some frequency-based datasets') mail.send(subject, recipients, 'frequency_reminder', org=reminded_org, datasets=datasets) print('{nb_orgs} orgs concerned'.format(nb_orgs=len(reminded_orgs))) reminded_people = flatten(reminded_people) print('{nb_emails} people contacted ({nb_emails_twice} twice)'.format( nb_emails=len(reminded_people), nb_emails_twice=len(reminded_people) - len(Set(reminded_people)))) print('Done')
def split_list(filename): l=[] flatlist = [] fileold = open(path + filename).readlines() fileold.pop(0) fileold.pop(len(fileold)-1) #split date, time, id for line in fileold: #filter lines not starting with date if re.match(r"^\d{2}.\d{2}.\d{4}\s\d{2}:\d{2}:\d{2}:",line): l.append(line.split(' ', 3)) else: if("has been installed" in line): notify.append(line) #if lines not starting with date, append to other list to #print these messages at the end of the new file other_messages.append(line) #split time for entry in l: entry[1]=entry[1].split(':',3) #flatten time sublist for i in l: flatlist.append(flatten(i)) #os.remove(path + filename) return flatlist
def split_list(filename): l = [] flatlist = [] fileold = open(path + filename).readlines() fileold.pop(0) fileold.pop(len(fileold) - 1) #split date, time, id for line in fileold: #filter lines not starting with date if re.match(r"^\d{2}.\d{2}.\d{4}\s\d{2}:\d{2}:\d{2}:", line): l.append(line.split(' ', 3)) else: if ("has been installed" in line): notify.append(line) #if lines not starting with date, append to other list to #print these messages at the end of the new file other_messages.append(line) #split time for entry in l: entry[1] = entry[1].split(':', 3) #flatten time sublist for i in l: flatlist.append(flatten(i)) #os.remove(path + filename) return flatlist
def process_data_count(file_list, path, l): data = [] for file in file_list: print('Processing data: ', file) data_temp = [] file_object = open(path+file+'.txt') file_name = file_object.readlines() file_name = [s.strip('\r\n') for s in file_name] del file_name[0] # Split the CDR3s with spaces for fileline in file_name: s = fileline.split(',') s = (s[0]+' ')*int(s[1]) s = s.split(' ') data_temp.append(s[:-1]) data_temp = flatten(data_temp) # Randomly select l CDR3s if l != 'all': random.shuffle(data_temp, random.random) data_temp = data_temp[:l] data.append(' '.join(chain(data_temp))) return data
def pHash(imgfile): # 加载并调整图片为32x32灰度图片 img = cv2.imread(imgfile, 0) img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_CUBIC) # 创建二维列表 h, w = img.shape[:2] vis0 = np.zeros((h, w), np.float32) vis0[:h, :w] = img # 填充数据 # 二维Dct变换 vis1 = cv2.dct(cv2.dct(vis0)) # 拿到左上角的8 * 8 vis1 = vis1[0:8, 0:8] # 把二维list变成一维list img_list = flatten(vis1.tolist()) # 计算均值 avg = sum(img_list) * 1. / len(img_list) avg_list = ['0' if i < avg else '1' for i in img_list] # 得到哈希值 return ''.join([ '%x' % int(''.join(avg_list[x:x + 4]), 2) for x in range(0, 8 * 8, 4) ])
def email_prob(path_to_text_file, ham_probs, spam_probs): prob_list = [] #list of probabilities that will be factored doc_list = [] #will be used to create list of words in doc with open(str(path_to_text_file)) as current_file: #open text file qlist = [item.split() for item in current_file] #break into lists qlist = flatten(qlist) # correct for newline qlist = [re.sub(r'[^A-Za-z0-9]+', '', x) for x in qlist] #punctuation qlist = filter(None, qlist) #remove blank space entry for word in qlist: #iterate through words in list if word.lower() not in doc_list: #check if word is already in file doc_list.append(word.lower()) #add word to file's list of words #now doc_list is a list of words in the document #establish our key values for bayes calculation prob_ham = float(len(ham_probs)/float((len(ham_probs) + len(spam_probs)))) prob_spam = 1-prob_ham spam_bayes = [] for word in doc_list: #iterate through terms in our email prob_word_in_spam = 0 for k,v in spam_probs.iteritems(): #iterate through our spam probs if k == word and word != 'subject': prob_word_in_spam += float(spam_probs[k]) #assign values for k,v in ham_probs.iteritems(): #iterate through our ham probs if k == word and word != 'subject': prob_word_in_ham = float(ham_probs[k]) #assign values if prob_word_in_spam > 0.0: prob_word = float((prob_word_in_spam * prob_spam) + (prob_word_in_ham * prob_ham)) prob_spam_given_word = float((prob_word_in_spam * prob_spam) / (prob_word)) spam_bayes.append(float((prob_spam_given_word * math.log(prob_word/(1-prob_word))) + math.log(1-prob_word))) log_prob_email_is_spam = 0 for val in spam_bayes: log_prob_email_is_spam += val return math.exp(log_prob_email_is_spam)
def append_data(src, dest, classes): """ read class c from src, append result to dest :param src: url :param dest: url :param classes: classes a list :return: """ for c in classes: if c not in EMOTION_CLASS.keys(): raise ValueError("%s is not support class" % c) src_tree = None dest_tree = None try: src_tree = ET.parse(src) dest_tree = ET.parse(dest) except IOError: print "cannot parse file" exit(-1) if src_tree and dest_tree: src_root = src_tree.getroot() dest_root = dest_tree.getroot() l = [src_root.findall("weibo[@emotion-type='%s']" % c) for c in classes] l = flatten(l) random.shuffle(l) [dest_root.append(l1) for l1 in l] # write to file dest_tree.write(dest, encoding="utf-8") print "append data is done."
def __init__(self, chords, title = 'generated by walkingbass.py', author = 'aul Chambers'): self.author = author self.title = title self.chords = chords self.chordsd = [] self.i.name = "Double Bass" self.i.clef = "bass" self.i.set_range((Note('C-0'), Note('F-6'))) self.bassline = self._realbook(self.chords) self.bassline = flatten(self.bassline) longest_common = longest_duplicate_substring(self.bassline) print longest_common #TODO:IMPLETMENT THIS return self._naive() self._flow() self.track = self._create_track() self.track.add_chords(self.chords) self.to_png() self.to_midi() self.to_mp3()
def getIdenticalFiles(): fileList=[] AllFilesList=[] for dirname,dirnames,filenames in os.walk('./'): for file in filenames: filePath = os.path.join(dirname, file) AllFilesList.append(filePath) finalFileList=flatten(AllFilesList) identicalFileList=[] for i in range(0,len(finalFileList)): x=finalFileList.__getitem__(i) for j in range(i,len(finalFileList)): result="" y=finalFileList.__getitem__(j) if x!=y: str="diff"+" "+x+" "+y result=commands.getoutput(str) if result=="" or result is None: filetuple=(x,y) identicalFileList.append(filetuple) j+=1 i+=1 if len(identicalFileList)!=0: print "List of identical files in the current directory:\n",identicalFileList else: print "There are no identical files in the current directory" print "\n\n"
def generic_visit(self, node, visited_children): if node.expr_name and is_selmaho_expression(node.expr_name): return Cmavo(lerpoi(visited_children), node.expr_name) elif node.node_type() in (LITERAL, REGEX): return node.text else: return flatten(visited_children)
def regular_jobdesc(self): if self.jdstr: jobDesc = self.jdstr.find("div", "job_request").find_all("p") #没找到p标签的情况下 if not jobDesc: if self.jdstr.find("div", "job_request").find("ul"): jobDesc = self.jdstr.find("div", "job_request").get_text().split("\n") #找到p标签的情况下 else: jobDesc = [i.get_text().strip() if i.get_text().find("\n")==-1 and i.get_text().strip() else i.get_text().split("\n") for i in jobDesc ] jobDesc = flatten(jobDesc) self.jdJob[self.jobName]["jobDesc"] = u'\n'.join(jobDesc) if len(jobDesc)==1: jobDesc = jobDesc[0] jobDesc = '\n'.join(filter(None,re.split(u";|:|。| ", jobDesc))) if type(jobDesc).__name__=="unicode": jobDesc = jobDesc.split('\n') ''' 逐行去找信息 self.flag是标志位,主要处理下述情况: 工作地址: 广东省深圳市XX路 self.extra_info有四个参数: 第一个line是一行文本(不带标签信息) 第二个idx是该行文本所在的行号 第三个add_desc是是否将该行文本加入职位描述信息 第四个clean_major为是否将之前得到的专业与技能信息进行清空 ''' self.flag = None for idx,line in enumerate(jobDesc): # print line+"Z"*50 self.extra_info(line,idx,add_desc=False)
def stringify_affiliation_rec(node): """ Flatten and join list to string ref: http://stackoverflow.com/questions/2158395/flatten-an-irregular-list-of-lists-in-python """ parts = recur_children(node) return " ".join(flatten(parts)).strip()
def getVpt(v,k): v=abs(v) vList=v.tolist() vList=flatten(vList) vList.sort() k=(int)(k*len(vList)) return vList[k]
def main(self): """docstring for main""" # fname = '/home/gold/Documents/ICS/Home.ics' args = parse_args() fname = args.calendar_file self.output_format = args.output_format self.display_header = args.display_header self.start = args.date self.start = utc.localize(self.start) self.stop = args.date + datetime.timedelta(args.num_days) self.stop = utc.localize(self.stop) cal = icalendar.Calendar.from_ical(open(fname, 'rb').read()) events = [] for event in cal.walk('vevent'): new_event = self.process_event(event) if new_event: events.append(new_event) events = flatten(events) events = sorted(events, cmp=self.date_compare) count = 0 if self.output_format == 'tsv' and self.display_header: print "current\tdtstart\tdtend\tduration\tsummary\tlocation\t" \ "status\tpriority\torganizer\tall day\trecurring\tcreated\t" \ "attendees" for x in events: count += 1 if args.num_records != 0 and count > args.num_records: break self.output_record(x)