Example #1
0
    def __init__(self, fin, h1, h2, outputs,
                 lr, C, pDropHidden1=0.2, pDropHidden2=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropHidden1 = pDropHidden1
        self.pDropHidden2 = pDropHidden2
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        self.params.append(layerMLPParams((fin, h1)))
        self.params.append(layerMLPParams((h1, h2)))
        self.params.append(layerMLPParams((h2, outputs)))

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.matrix('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, pDropHidden1, pDropHidden2)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Example #2
0
def information_gain(previous_y, current_y):
    # Inputs:
    #   previous_y: the distribution of original labels (0's and 1's)
    #   current_y:  the distribution of labels after splitting based on a particular
    #               split attribute and split value

    # TODO: Compute and return the information gain from partitioning the previous_y labels
    # into the current_y labels.
    # You will need to use the entropy function above to compute information gain
    # Reference: http://www.cs.cmu.edu/afs/cs.cmu.edu/academic/class/15381-s06/www/DTs.pdf
    """
    Example:

    previous_y = [0,0,0,1,1,1]
    current_y = [[0,0], [1,1,1,0]]

    info_gain = 0.45915
    """

    info_gain = 0
    y_left = current_y[0]
    y_right = current_y[1]
    info_gain = entropy(previous_y) - (
        entropy(y_left) * (len(y_left) / len(flatten(current_y))) +
        (entropy(y_right) * (len(y_right) / len(flatten(current_y)))))

    return info_gain
def node_dictionary(filename):
    global P, initPR, newPR, dictionary
    with open(filename, 'r') as f:
        for line in f:
            splitLine = line.split()
            dictionary[splitLine[0]] = splitLine[1:]

    #create a list of sinknodes
    for k in dictionary.keys():
        if k not in flatten(dictionary.values()):
            sinknodes.append(k)

    #create a list of keys of the dictionary
    for key in dictionary.keys():
        P.append(key)

    #create initial page rank for each nodes
    for p in dictionary:
        initPR[p] = (float("{0:.3f}".format(float(1) / len(P))))

    #create total outlinks for each nodes
    for k in dictionary.keys():
        ctr = flatten(dictionary.values()).count(k)
        outlinks[k] = ctr

    #call pagerank function with dictionary, and other details
    calc_page_rank(dictionary, initPR, sinkNodes)
def node_dictionary(filename): 
   global P, initPR, newPR, dictionary
   with open(filename, 'r') as f: 
          for line in f: 
            splitLine = line.split() 
            dictionary[splitLine[0]] =splitLine[1:]

   #create a list of sinknodes
   for k in dictionary.keys(): 
        if k not in flatten(dictionary.values()): 
            sinknodes.append(k)
   
   #create a list of keys of the dictionary
   for key in dictionary.keys(): 
      P.append(key) 

   #create initial page rank for each nodes
   for p in dictionary: 
      initPR[p] = (float("{0:.3f}".format(float(1)/len(P)))) 

   #create total outlinks for each nodes
   for k in dictionary.keys(): 
      ctr=flatten(dictionary.values()).count(k) 
      outlinks[k]=ctr 

   #call pagerank function with dictionary, and other details
   calc_page_rank(dictionary, initPR, sinkNodes) 
Example #5
0
def classify(query_data, data, newMat, w):
    n, m = data.shape
    axis_x = flatten(data[0].tolist())
    axis_y = flatten(data[1].tolist())
    label = data.values[:, -1]
    color = ['b', 'r', 'g', 'y', 'c']
    # 原始二维散点图
    plt.subplot(2, 1, 1)
    for i in range(n):
        plt.scatter(axis_x[i], axis_y[i], c=color[int(label[i])], marker='o', s=5)
    mean_class = data.groupby([m - 1]).mean().values
    count_class = data.groupby([m - 1]).count().values[:, 0]
    w0 = sum([x*y*w for x, y in zip(mean_class, count_class)]) / n
    for dt in query_data:
        if dt * w - w0[0, 0] > 0:
            print "class1"
        else:
            print "class2"
        plt.title('raw data')
    # 变换后只有一维,为了显示直观,y轴随机取值0-1
    plt.subplot(2, 1, 2)
    axis_x = flatten(newMat[:, 0].tolist())
    axis_y = [random.random() for _ in range(n)]
    for j in range(n):
        plt.scatter(axis_x[j], axis_y[j], c=color[int(label[j])], marker='o', s=5)
    plt.plot([w0[0, 0]] * n, axis_y)
    plt.title('new data')
    plt.show()
def greedy_allocation(EVlist, S, endowment, t):
    #creates a multiset of reported active marginal values for active agents
    V = []
    for ev in range(len(EVlist)):
        if (EVlist[ev].arrival <= t and EVlist[ev].departure >= t):
            if (S >= EVlist[ev].consumption):
                query_cons = EVlist[ev].consumption
            else:
                query_cons = S
            V.append(EVlist[ev].marginal_values[:query_cons])
    #then updates an agents endowment
    V_help = sorted(list(flatten(V)),
                    reverse=True)  #make list of lists, flat array
    for s in range(S):  #for every unit of electricity
        if V_help:
            max_v = max(list(flatten(V_help)))  #finds max from flat array
            for ev in range(len(EVlist)):
                if max_v in EVlist[ev].marginal_values:
                    #add endowment
                    endowment[ev] += 1
                    #remove values from agent
                    V_help.pop(0)  #in case of s>1
                    EVlist[ev].marginal_values.pop(0)
    #return endowment as journal guides
    if empty(V):
        V.append([0])
    return (endowment, flatten(V))
def returnNull(time_series):
	rank_arr = np.array(np.zeros((time_series.shape[0],time_series.shape[1]))) #copy for array dimension instantiation
	cntr=0
	for col in time_series.T:
		rank_arr[:,cntr]=rank_expressions(col); cntr+=1
	signarr = np.array(np.zeros((rank_arr.shape[0],rank_arr.shape[1]-1)))
	cntr=0
	for row in time_series:
		signarr[cntr,:] = rank_sign_diff(row); cntr+=1

	phase_arrs=[]
	for row in signarr:
		arr = calc_phases(row)
		phase_arrs.append(arr)

	phase_counts = []
	for arrs in phase_arrs:
		phase_lengths = calc_phase_lengths(arrs)
		phase_counts.append(phase_lengths)
	bins =  set(flatten(np.arange(1,time_series.shape[1],1)))
	frequencies = [];
	for arrs in phase_counts:
		freqs = calculate_frequencies(arrs,bins)
		frequencies.append(freqs)
	popdist = calculate_frequencies(flatten(phase_counts),bins)
	popdist[popdist==0] = 1e-100
	return popdist,bins
Example #8
0
def returnNull(time_series):
    rank_arr = np.array(np.zeros(
        (time_series.shape[0],
         time_series.shape[1])))  #copy for array dimension instantiation
    cntr = 0
    for col in time_series.T:
        rank_arr[:, cntr] = rank_expressions(col)
        cntr += 1
    signarr = np.array(np.zeros((rank_arr.shape[0], rank_arr.shape[1] - 1)))
    cntr = 0
    for row in time_series:
        signarr[cntr, :] = rank_sign_diff(row)
        cntr += 1

    phase_arrs = []
    for row in signarr:
        arr = calc_phases(row)
        phase_arrs.append(arr)

    phase_counts = []
    for arrs in phase_arrs:
        phase_lengths = calc_phase_lengths(arrs)
        phase_counts.append(phase_lengths)
    bins = set(flatten(np.arange(1, time_series.shape[1], 1)))
    frequencies = []
    for arrs in phase_counts:
        freqs = calculate_frequencies(arrs, bins)
        frequencies.append(freqs)
    popdist = calculate_frequencies(flatten(phase_counts), bins)
    popdist[popdist == 0] = 1e-100
    return popdist, bins
Example #9
0
        def write_data(_writer, _events, _labels, _masks, _durations):
            current_split_midi_num = len(_events)
            for k in range(current_split_midi_num):
                padded_events_flat = flatten(_events[k])
                padded_labels_flat = flatten(_labels[k])
                keys_mask_flat = flatten(_masks[k])
                durations_flat = flatten(_durations[k])

                # write TFrecord    each label and events sampled
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'events':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=padded_events_flat)),
                        'labels':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=padded_labels_flat)),
                        'keys_mask':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=keys_mask_flat)),
                        'durations':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=durations_flat)),
                    }))
                serialized = example.SerializeToString()
                _writer.write(serialized)
Example #10
0
    def __init__(self, fin, f1, nin1, f2, nin2, f3, nin3, expand, h1, outputs,
                 lr, C, pDropConv=0.2, pDropHidden=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropConv = pDropConv
        self.pDropHidden = pDropHidden
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        self.paramsNIN = []
        self.paramsConv = []
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        self.paramsNIN.append(layerNINParams((f1, fin, nin1, 3, 3), expand))
        self.paramsNIN.append(layerNINParams((f2, f1 * expand, nin2, 3, 3), expand))
        self.paramsNIN.append(layerNINParams((f3, f2 * expand, nin3, 3, 3), expand))
        # 全局平均池化层
        self.paramsConv.append(layerConvParams((h1, f3 * expand, 1, 1)))
        self.paramsConv.append(layerConvParams((outputs, h1, 1, 1)))
        self.params = self.paramsNIN + self.paramsConv

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.tensor4('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, pDropConv, pDropHidden)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Example #11
0
    def __init__(self, fin, h1, piece1, h2, piece2, outputs,
                 lr, C, pDropHidden1=0.2, pDropHidden2=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropHidden1 = pDropHidden1
        self.pDropHidden2 = pDropHidden2
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        hiddens = []
        pieces = []
        # maxout层,指定piece表示分段线性函数的段数,即使用隐隐层的个数,维度与一般MLP相同,使用跨通道最大池化
        self.params.append(layerMLPParams((fin, h1 * piece1)))
        hiddens.append(h1)
        pieces.append(piece1)
        self.params.append(layerMLPParams((h1, h2 * piece2)))
        hiddens.append(h2)
        pieces.append(piece2)
        self.params.append(layerMLPParams((h2, outputs)))

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.matrix('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, hiddens, pieces, pDropHidden1, pDropHidden2)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, hiddens, pieces, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def maptest14bus_test_system(comp_filename, start_range, contingency_range,
                             max_load_loss_contingency):
    from compiler.ast import flatten

    # Method Variables Initialization
    CMB = []
    CMB_new = []
    temp_valueset = []
    new_valueset = []
    valueset_new = []
    valueset = comp_filename
    valueset_new = list(flatten(valueset))
    iter_max_load_loss_outage = max_load_loss_contingency
    max_load_loss_outage = iter_max_load_loss_outage
    max_load_loss_outage = list(flatten(max_load_loss_outage))

    # Remove the identified transmission lines
    for elem in range(0, len(max_load_loss_outage)):
        valueset_new.remove(max_load_loss_outage[elem])
    new_valueset = [
        valueset_new[i:i + 1] for i in range(0, len(valueset_new), 1)
    ]

    # Creating a new contingency list
    for i in range(0, len(new_valueset)):
        temp_valueset = new_valueset[i]
        temp_iter_max_load_loss_outage = iter_max_load_loss_outage[0]
        iter_temp_comb = temp_valueset + temp_iter_max_load_loss_outage
        CMB.append(iter_temp_comb)
    CMB_new.append(CMB)
    return CMB_new
Example #13
0
def input_MI_data(filename1,filename2,filename3,labelname):
    temp1 = np.ones((36, 1))
    temp2 = np.ones((24, 1))
    temp = np.ones((60, 1))


    data1 = scio.loadmat(filename1)['ss']
    data2 = scio.loadmat(filename2)['ss']
    data3 = scio.loadmat(filename3)['ss']
    label = scio.loadmat(labelname)['label']

    label = label - temp
    '''
    原始数据维度data1[20,30720]
    最终数据维度data[60,3channel*256Hz*6s]
    '''
    data = np.zeros((60, 4608), np.float32)
    #4608 = 64*72
    for i in range(20):
        data[i] = flatten(data1[:,i*256*6:(i+1)*256*6].tolist())
        data[i+20] = flatten(data2[:,i*256*6:(i+1)*256*6].tolist())
        data[i+40] = flatten(data3[:,i*256*6:(i+1)*256*6].tolist())
    x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.4, random_state=0)
    print x_train.shape
    print x_test.shape
    print y_train.shape
    print y_test.shape
    print y_train
    return x_train,  np.transpose(y_train), x_test, np.transpose(y_test)


#input_MI_data('sxq1_Segmentation.dat.mat','sxq2_Segmentation.dat.mat','sxq3_Segmentation.dat.mat')
Example #14
0
def getName(node):
    if node is None: return ''
    if isinstance(node, (basestring, int, long, float)):
        return str(node)
    if isinstance(node, (ast.Class, ast.Name, ast.Function)):
        return node.name
    if isinstance(node, ast.Dict):
        pairs = ['%s: %s' % pair for pair in [(getName(first), getName(second))
                        for (first, second) in node.items]]
        return '{%s}' % ', '.join(pairs)
    if isinstance(node, ast.CallFunc):
        notArgs = [n for n in node.getChildNodes() if n not in node.args]
        return getNameTwo('%s(%s)', notArgs, node.args, rightJ=', ')
    if isinstance(node, ast.Const):
        try:
            float(node.value)
            return str(node.value)
        except:
            return repr(str(node.value))
    if isinstance(node, ast.LeftShift):
        return getNameTwo('%s<<%s', node.left, node.right)
    if isinstance(node, ast.RightShift):
        return getNameTwo('%s>>%s', node.left, node.right)
    if isinstance(node, (ast.Mul, ast.Add, ast.Sub, ast.Power, ast.Div, ast.Mod)):
        return getNameMath(node)
    if isinstance(node, ast.Bitor):
        return '|'.join(map(getName, node.nodes))
    if isinstance(node, ast.UnarySub):
        return '-%s' % ''.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.List):
        return '[%s]' % ', '.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Tuple):
        return '(%s)' % ', '.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Lambda):
        return 'lambda %s: %s' % (', '.join(map(getName, node.argnames)), getName(node.code))
    if isinstance(node, ast.Getattr):
        return '.'.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Compare):
        rhs = node.asList()[-1]
        return '%s %r' % (' '.join(map(getName, node.getChildren()[:-1])), rhs.value)
    if isinstance(node, ast.Slice):
        children = node.getChildren()
        slices = children[2:]
        formSlices = []
        for sl in slices:
            if sl is None:
                formSlices.append('')
            else:
                formSlices.append(getName(sl))
        sliceStr = ':'.join(formSlices)
        return '%s[%s]' % (getName(children[0]), sliceStr)
    if isinstance(node, ast.Not):
        return "not %s" % ''.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Or):
        return " or ".join(map(getName, node.nodes))
    if isinstance(node, ast.And):
        return " and ".join(map(getName, node.nodes))
    if isinstance(node, ast.Keyword):
        return "%s=%s" % (node.name, getName(node.expr))
    return repr(node)
Example #15
0
def checkMatch(tag, tagFound):
    # first, see if body text is a data field
    if len(tag.contents) == 1 and getTagText(tag).count("%") == 1:
        if len(getTagText(tagFound).strip()) <= 0:
            #print "Returning FALSE match due to empty tag text"
            return False

    # remove data attributes and check rest with found tag to see if match
    tagAttrSet = {
        x
        for x in tag.attrs if type(tag[x]) is list or (
            tag[x].find("%") < 0 and x.find("interleech") < 0)
    }
    #print "Tag Attribute Set:", tagAttrSet
    # some attributes have simple values, add them to a set
    tagValueSet = {tag[x] for x in tagAttrSet if type(tag[x]) is not list}
    #print "Tag Value Set:", tagValueSet
    tagFoundValueSet = {
        tagFound[x]
        for x in tagAttrSet if type(tagFound[x]) is not list
    }
    #print "Tag Found Value Set:", tagFoundValueSet
    # other attributes have lists of values, add them to a new list
    # then flatten and add to a set to be compared
    tagValueSet2 = set(
        flatten([tag[x] for x in tagAttrSet if type(tag[x]) is list]))
    tagFoundValueSet2 = set(
        flatten([tagFound[x] for x in tagAttrSet
                 if type(tagFound[x]) is list]))
    if tagValueSet == tagFoundValueSet and tagValueSet2 == tagFoundValueSet2:
        #print "MATCH FOUND! ", tagFound.name, " with attrs: ", tagFound.attrs , " with text: ", tagFound.get_text().strip()[:10]
        return True

    return False
Example #16
0
def evaluate_words(dictssentence,keyword,new=True):
    # for word !!!
    dictsword_tmp = {key: sentence_evaluate(value) for key, value in dictssentence.items() if len(sentence_evaluate(value)) > 0}
    dictsword_tmp2 = map(lambda z: z[1], dictsword_tmp.items())
    corpus = map(lambda z: " ".join(z), dictsword_tmp2)
    wordlists = top50words(corpus)
    # sentence lists
    if(new):
        if keyword[0] is None:
            words_final = map(lambda (word, importance): word, wordlists)[:100]
        else:
            dictsword_tmp3 = list(set(flatten(dictsword_tmp2)))
            word_potential = map(lambda t: (t,similar_check_higher(t, keyword)), flatten(dictsword_tmp3))
            words_2 = sorted(word_potential, key = lambda (word, score):score, reverse =True)
            words_3 = filter(lambda (key,score):score != -1,words_2)
            words_final = map(lambda (key, score): key, words_3)[:100]
        # word importance based on word2vec
        # ids:word index: order id score: vector
        word_vector_dicts = [distattr2(word, word2vec_evaluate(word)) for word in words_final if len(word2vec_evaluate(word)) != 0]
        try:
            final_list = map(lambda x: (x.ids,x.score),textrankgetter(word_vector_dicts, False))
            return final_list
        except:
            return wordlists
    else:
        return wordlists
def page_rank(matrix, nodes, beta=0.85, epsilon=None, period=None):

    #input parameters-for testing
    #matrix=[1./2,1./2,0,1./2,0,0,0,1./2,1]
    #beta=0.8
    #nodes=3
    #epsilon=0.0001
    
    t=0
    #comprehensions
    
    teleport_matrix=[]
    teleport_matrix=[((1-beta)*1.0/nodes) for i in (range(nodes)*nodes)]
    flatten(matrix)
    matrix=[(float(i)*beta) for i in matrix]
    
    alpha=[(i+j) for i,j in zip(matrix,teleport_matrix)]
    alpha_array=numpy.matrix(alpha)
    alpha_array=alpha_array.reshape(nodes,nodes)
    
    ######################
    page_ranks=defaultdict(list)
    gen=[i for i in range(nodes)]
    
    for i in gen:
        page_ranks[i].append(0.)
        page_ranks[i].append(1./nodes)
        
    t=0
    
    
        
        
    def vectorize(page_ranks):
        vector=[]
        for i in page_ranks:
            vector.append(page_ranks[i][-1])
        c=numpy.matrix(vector).transpose()
        return c
    
    
    if epsilon is None and period is not None:
        while t<period:
            current_page_ranks=alpha_array*vectorize(page_ranks)
            for i, n in enumerate(current_page_ranks):
                page_ranks[i].append(float(n))           
            t+=1
    elif epsilon is not None and period is None:
        while float(abs(page_ranks[i][-1]-page_ranks[i][-2]))>epsilon:
            current_page_ranks=alpha_array*vectorize(page_ranks)
            for i, n in enumerate(current_page_ranks):
                page_ranks[i].append(float(n))           
            t+=1        
    else:
        print "Please enter either an Epsilon or Period parameter. Both cannot be empty and both cannot be given!"
    
    for i in page_ranks:
        print "The Page Ranks Are:",i,"   ",page_ranks[i][-1]
    print "It took %d iterations" % (t)
Example #18
0
def conductance(comm, hyperedges, node_tags=None):
    """Compute clustering conductance measure
    
    Parameters
    ----------
    comm: list or dict
        community list of community dictionary
    hyperedges: list of lists
        list of hyperedges
    node_tags: list of str, optional, if comm is a dictionary, specify correct order of keys
        list of partite labels, as found in the hyperedges
    
    Returns
    -------
    scores: dictionary
        conductance scores 
    """
    if isinstance(comm, list):
        comm_dict, node_tags = community_array_to_community_dict(
            comm, hyperedges)
    else:
        if not node_tags:
            node_tags = partite_order(hyperedges, comm)
        comm_dict = comm

    out_edges = {}
    degs = {}
    _he_comm_list = []
    for he in hyperedges:
        _he_comm_list = [
            comm_dict[node_tags[ind]][n] for ind, n in enumerate(he)
        ]  #nominator
        if any(isinstance(x, list) for x in _he_comm_list):  #overlapping case
            _he_comm_counts = dict(Counter(flatten(_he_comm_list)))
            for n in _he_comm_counts:
                if _he_comm_counts[n] < len(comm_dict):
                    if n not in out_edges:
                        out_edges[n] = 1
                    else:
                        out_edges[n] = 1 + out_edges[n]
        elif len(sp.unique(_he_comm_list)) > 1:  #non overlapping case
            for n in sp.unique(_he_comm_list):
                if n not in out_edges:
                    out_edges[n] = 1
                else:
                    out_edges[n] = 1 + out_edges[n]
        for n in flatten(_he_comm_list):  #denominator
            if n not in degs:
                degs[n] = 1
            else:
                degs[n] = 1 + degs[n]

    scores = {}
    for c in degs:
        other_deg = sp.array([v for k, v in degs.items() if k != c]).sum()
        scores[c] = float(out_edges[c]) / min(
            [float(degs[c]), float(other_deg)])

    return scores
Example #19
0
def funcgamewon():
	flattenedcompdata=flatten(compdata)
	flatteneduserdata=flatten(userdata)
	if True in flattenedcompdata or True in flatteneduserdata:
		gamewon[0]=True
		gamewon[1]=flatteneduserdata
		gamewon[2]=flattenedcompdata
        return gamewon
def dfsForTrees(solution, tree_pos, row, lizards):
    size = len(solution)                          
    all_positions = generateLizardPositionsForTreesDfs(solution, tree_pos, row)
    stack = []
    stack += all_positions
    position = None
    if not stack:
        if getHeuristicCostForTrees(makeBoard(solution, tree_pos, 0))==0 and lizards == sum(x is not None for x in flatten(solution)):
            return True
        if row > len(board)-2:
            return False
        stack = generateLizardPositionsForTreesDfs(solution, tree_pos, row+1)
        row = row + 1
    while stack:
        position = stack.pop()
        if type(position) is int:
            position = [row, position]
        else:
            position = [row, list(position)]
        if(isValidSolutionForTrees(solution, position[0], position[1], lizards, size)):
            solution[row] = position[1]                                             #To keep the last location on board when returned otherwise no last location on board
            return True
        if(isValidPositionForTrees(solution, tree_pos, position[0], position[1])):
            if(row > len(board)-2):                                                             #To keep the dfs from going beyond the rows of the board and allow it to backtrack to another solution
                solution[position[0]] = position[1]                                                  #To remove the last location in placed of lizard
                return False
            solution[position[0]] = position[1]
            rem_placed = lizards - sum(x is not None for x in flatten(solution))
            if(maxLizards(makeBoard(solution, tree_pos, row+1)) >= rem_placed): 
                explored = dfsForTrees(solution, tree_pos, row+1, lizards)
                if explored:
                    return True
                else:
                    solution[position[0]] = None  
            else:
                solution[position[0]] = None  
                return False
        
        if(time.time()-start_time > 290):
            return False
                        
    rem_placed = lizards - sum(x is not None for x in flatten(solution))
    if(maxLizards(makeBoard(solution, tree_pos, row+1)) >= rem_placed):                     #To skip rows with no possible placement in order to try for other rows or else dfs will fail
        if(row > len(board)-2):                                                             #To keep the dfs from going beyond the rows of the board and allow it to backtrack to another solution
            if type(position) is None:
                return False
            solution[position[0]] = None                                                    #To remove the last location in placed of lizard
            return False
        explored = dfsForTrees(solution, tree_pos, row+1, lizards)
        if explored:
                return True
        else:
            if position is not None:
                solution[position[0]] = None  
            return False
        
#    board[position[0]][position[1]] = 0
    return False
Example #21
0
def buildData(textFile, sYr, labelFile):

    # Make array out of text data
    path = _get_data('../../data', 'analysis')
    to_read = os.path.join(path, textFile)
    texts = loadJSON(to_read)
    tYr = []
    for slice in texts:
        tYr.extend([str(sYr)] * len(slice))
        sYr += 1
    texData = np.empty([len(tYr), 3], dtype=np.object)
    texData[:, 0] = [dlPull(texts, 'nameClean')[ii] + '_' + str(tYr[ii])
                     for ii in range(0, len(tYr))]
    texData[:, 1] = [' '.join(x) for x in dlPull(texts, 'dataClean')]
    texData[:, 2] = [len(x) for x in dlPull(texts, 'dataClean')]

    # Make array out of labeled data
    regime_path = _get_data('../../data', 'regimeData')
    labelFile = os.path.join(regime_path, labelFile)

    labList = []
    with open(labelFile, 'rU') as d:
        next(d)
        reader = csv.reader(d)
        for row in reader:
            cntryYr = row[1].lower() + '_' + str(row[2])
            labels = [row[col] for col in range(3, len(row))]
            labList.append(flatten([cntryYr, labels]))
    labData = np.array(labList)

    # Find intersections and differences
    inBoth = list(set(texData[:, 0]) & set(labData[:, 0]))
    niLab = list(set(texData[:, 0]) - set(labData[:, 0]))
    niTex = list(set(labData[:, 0]) - set(texData[:, 0]))

######
#This is where modifications have to happen depending on whether you're
#doing in-sample or true out-of-sample work. This is a difficult way to
#do this, but changing it requires changing the entirety of how the system
#works, from scrape to analysis.
######
    c = np.unique([x.split('_')[0] for x in inBoth])
    d = flatten([['{}_{}'.format(country, year) for country in c] for year in
                 tYr])
    tMatches = flatten(
        [[i for i, x in enumerate(texData[:, 0]) if x == cyr] for cyr in d])
#    tMatches = flatten(
#        [[i for i, x in enumerate(texData[:, 0]) if x == cyr] for cyr in inBoth])
#    lMatches = flatten(
#        [[i for i, x in enumerate(labData[:, 0]) if x == cyr] for cyr in inBoth])

    tlData = texData[tMatches, ]

#    tlData = np.hstack(
#        (texData[tMatches, ],
#         labData[lMatches, 1:labData.shape[1]]))
    return tlData
Example #22
0
def getName(node):
    if node is None: return ''
    if isinstance(node, (basestring, int, long, float)):
        return str(node)
    if isinstance(node, (ast.Class, ast.Name, ast.Function)):
        return node.name
    if isinstance(node, ast.Dict):
        pairs = [
            '%s: %s' % pair for pair in [(getName(first), getName(second))
                                         for (first, second) in node.items]
        ]
        return '{%s}' % ', '.join(pairs)
    if isinstance(node, ast.CallFunc):
        notArgs = [n for n in node.getChildNodes() if n not in node.args]
        return getNameTwo('%s(%s)', notArgs, node.args, rightJ=', ')
    if isinstance(node, ast.Const):
        try:
            float(node.value)
            return str(node.value)
        except:
            return repr(str(node.value))
    if isinstance(node, ast.LeftShift):
        return getNameTwo('%s<<%s', node.left, node.right)
    if isinstance(node, ast.RightShift):
        return getNameTwo('%s>>%s', node.left, node.right)
    if isinstance(node,
                  (ast.Mul, ast.Add, ast.Sub, ast.Power, ast.Div, ast.Mod)):
        return getNameMath(node)
    if isinstance(node, ast.Bitor):
        return '|'.join(map(getName, node.nodes))
    if isinstance(node, ast.UnarySub):
        return '-%s' % ''.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.List):
        return '[%s]' % ', '.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Tuple):
        return '(%s)' % ', '.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Lambda):
        return 'lambda %s: %s' % (', '.join(map(
            getName, node.argnames)), getName(node.code))
    if isinstance(node, ast.Getattr):
        return '.'.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Compare):
        rhs = node.asList()[-1]
        return '%s %r' % (' '.join(map(getName,
                                       node.getChildren()[:-1])), rhs.value)
    if isinstance(node, ast.Slice):
        children = node.getChildren()
        return '%s[%s%s]' % (getName(children[0]), ':', children[-1].value)
    if isinstance(node, ast.Not):
        return "not %s" % ''.join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Or):
        return " or ".join(map(getName, node.nodes))
    if isinstance(node, ast.And):
        return " and ".join(map(getName, node.nodes))
    if isinstance(node, ast.Keyword):
        return "%s=%s" % (node.name, getName(node.expr))
    return repr(node)
Example #23
0
 def draw_matrix(figure_number):
     order_w = [[abs(w[m_row][m_col]) for m_row in flatten(row_indices)]
                for m_col in flatten(col_indices)]
     plt.figure(figure_number)
     plt.clf()
     plt.title("Weight matrix sparsification")
     plt.imshow(order_w)
     plt.colorbar(orientation='horizontal')
     plt.pause(0.001)
Example #24
0
def flatten(seq,container=None):
    if container is None:
        container = []
    for s in seq:
        if hasattr(s,'__iter__'):
            flatten(s,container)
        else:
            container.append(s)
    return container
def page_rank(matrix, nodes, beta=0.85, epsilon=None, period=None):

    #input parameters-for testing
    #matrix=[1./2,1./2,0,1./2,0,0,0,1./2,1]
    #beta=0.8
    #nodes=3
    #epsilon=0.0001

    t = 0
    #comprehensions

    teleport_matrix = []
    teleport_matrix = [((1 - beta) * 1.0 / nodes)
                       for i in (range(nodes) * nodes)]
    flatten(matrix)
    matrix = [(float(i) * beta) for i in matrix]

    alpha = [(i + j) for i, j in zip(matrix, teleport_matrix)]
    alpha_array = numpy.matrix(alpha)
    alpha_array = alpha_array.reshape(nodes, nodes)

    ######################
    page_ranks = defaultdict(list)
    gen = [i for i in range(nodes)]

    for i in gen:
        page_ranks[i].append(0.)
        page_ranks[i].append(1. / nodes)

    t = 0

    def vectorize(page_ranks):
        vector = []
        for i in page_ranks:
            vector.append(page_ranks[i][-1])
        c = numpy.matrix(vector).transpose()
        return c

    if epsilon is None and period is not None:
        while t < period:
            current_page_ranks = alpha_array * vectorize(page_ranks)
            for i, n in enumerate(current_page_ranks):
                page_ranks[i].append(float(n))
            t += 1
    elif epsilon is not None and period is None:
        while float(abs(page_ranks[i][-1] - page_ranks[i][-2])) > epsilon:
            current_page_ranks = alpha_array * vectorize(page_ranks)
            for i, n in enumerate(current_page_ranks):
                page_ranks[i].append(float(n))
            t += 1
    else:
        print "Please enter either an Epsilon or Period parameter. Both cannot be empty and both cannot be given!"

    for i in page_ranks:
        print "The Page Ranks Are:", i, "   ", page_ranks[i][-1]
    print "It took %d iterations" % (t)
Example #26
0
def mdoc_compile_unit((cu, die)):
	notOrdered = [child for child in die.iter_children()]

	# all subprogram which is a kind of function should be placed at bottom
	children = flatten(splitBy(isTagNotSubprogram, notOrdered))
	# [M doc]
	definitions = [mdoc((cu, child)) for child in children]
	# (a -> b) -> F  a -> F b
	return fmap(lambda xs: flatten(intersperse(xs, P.newline())), 
				sequence(definitions))
Example #27
0
 def func(label):
     if label == 'tminplot':
         tmin_plot.set_visible(not tmin_plot.get_visible())
         for i in flatten(tmin_error):
             if i:
                 i.set_visible(not i.get_visible())
     elif label == 'emass':
         for i in flatten(emass_plot):
             if i:
                 i.set_visible(not i.get_visible())
     plt.draw()
Example #28
0
def get_consts(clause):    
    words_pattern = '[A-Za-z0-9_]+' 
    functs_pattern = '(\w+)\s*\('
    if isinstance(clause,list):
        words = flatten([find(x,words_pattern) for x in clause])
        functors = flatten([find(x,functs_pattern) for x in clause]) 
    if isinstance(clause,basestring):
        words = find(clause,words_pattern) 
        functors = find(clause,functs_pattern) 
    consts = [x for x in words if not x in functors]    
    return tuple(consts)
Example #29
0
def get_consts(clause):
    words_pattern = "[A-Za-z0-9_]+"
    functs_pattern = "(\w+)\s*\("
    if isinstance(clause, list):
        words = flatten([find(x, words_pattern) for x in clause])
        functors = flatten([find(x, functs_pattern) for x in clause])
    if isinstance(clause, basestring):
        words = find(clause, words_pattern)
        functors = find(clause, functs_pattern)
    consts = [x for x in words if not x in functors]
    return tuple(consts)
Example #30
0
        def longest_common_string(str1, str2):
                break_strings = lambda __str: flatten([[e[0:i+1] for i in range(0, len(e))] for e in __str.split(" ")])
               
                common_strings = len(list(set.intersection(set(break_strings(str1)), set(break_strings(str2)))))

                min_length = min(sum([len(e) for e in str1.split(" ")]), sum([len(e) for e in str2.split(" ")]))
                max_length = max(sum([len(e) for e in str1.split(" ")]), sum([len(e) for e in str2.split(" ")]))
                if min_length == min([len(e) for e in flatten([str1.split(" "), str2.split(" ")])]):
                        
				return float(common_strings)/max_length
                return float(common_strings)/min_length
    def __init__(self, sent_sentiment_nps, sentences, __eatery_name):

        if __eatery_name:
            self.list_to_exclude = flatten([
                "food", "service", "cost", "ambience", "place", "Place", "i",
                "great", "good",
                __eatery_name.lower().split(), "rs", "delhi", "india", "indian"
            ])
            #self.list_to_exclude = ["food", "service", "cost", "ambience", "delhi", "Delhi",
            #       "place", "Place", __eatery_name.lower().split()]
        else:
            self.list_to_exclude = [
                "food", "i", "service", "cost", "ambience", "delhi", "Delhi",
                "place", "Place", "india", "indian"
            ]

        self.sentences = sentences
        self.sent_sentiment_nps = sent_sentiment_nps
        self.merged_sent_sentiment_nps = self.merge_similar_elements()

        print self.sentences[0:2],
        print self.sent_sentiment_nps[0:2]
        assert(set(Counter(self.merged_sent_sentiment_nps.keys()).values()) == {1}),\
                            "merge_similar_elements method has an error as all the keys are not unique"
        new_list = list()

        __sorted = sorted(self.merged_sent_sentiment_nps.keys())
        self.list_to_exclude = flatten(self.list_to_exclude)
        #self.NERs = self.ner()

        self.keys = self.merged_sent_sentiment_nps.keys()

        self.clusters = list()
        self.result = list()

        self.filter_clusters()

        #The noun phrases who were not at all in the self.clusters
        self.without_clusters = set.difference(set(range(0, len(self.keys))),
                                               set(flatten(self.clusters)))

        self.populate_result()

        self.common_ners = list(
            set.intersection(set([e[0] for e in self.ner()]),
                             set([e[0] for e in self.custom_ner()])))

        self.result = self.filter_on_basis_pos_tag()

        self.result = sorted(self.result,
                             reverse=True,
                             key=lambda x: x.get("positive") + x.get(
                                 "negative") + x.get("neutral"))
Example #32
0
def getName(node):
    if node is None:
        return ""
    if isinstance(node, (basestring, int, long, float)):
        return str(node)
    if isinstance(node, (ast.Class, ast.Name, ast.Function)):
        return node.name
    if isinstance(node, ast.Dict):
        pairs = ["%s: %s" % pair for pair in [(getName(first), getName(second)) for (first, second) in node.items]]
        return "{%s}" % ", ".join(pairs)
    if isinstance(node, ast.CallFunc):
        notArgs = [n for n in node.getChildNodes() if n not in node.args]
        return getNameTwo("%s(%s)", notArgs, node.args, rightJ=", ")
    if isinstance(node, ast.Const):
        try:
            float(node.value)
            return str(node.value)
        except:
            return repr(str(node.value))
    if isinstance(node, ast.LeftShift):
        return getNameTwo("%s<<%s", node.left, node.right)
    if isinstance(node, ast.RightShift):
        return getNameTwo("%s>>%s", node.left, node.right)
    if isinstance(node, (ast.Mul, ast.Add, ast.Sub, ast.Power, ast.Div, ast.Mod)):
        return getNameMath(node)
    if isinstance(node, ast.Bitor):
        return "|".join(map(getName, node.nodes))
    if isinstance(node, ast.UnarySub):
        return "-%s" % "".join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.List):
        return "[%s]" % ", ".join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Tuple):
        return "(%s)" % ", ".join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Lambda):
        return "lambda %s: %s" % (", ".join(map(getName, node.argnames)), getName(node.code))
    if isinstance(node, ast.Getattr):
        return ".".join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Compare):
        rhs = node.asList()[-1]
        return "%s %r" % (" ".join(map(getName, node.getChildren()[:-1])), rhs.value)
    if isinstance(node, ast.Slice):
        children = node.getChildren()
        return "%s[%s%s]" % (getName(children[0]), ":", children[-1].value)
    if isinstance(node, ast.Not):
        return "not %s" % "".join(map(getName, ast.flatten(node)))
    if isinstance(node, ast.Or):
        return " or ".join(map(getName, node.nodes))
    if isinstance(node, ast.And):
        return " and ".join(map(getName, node.nodes))
    if isinstance(node, ast.Keyword):
        return "%s=%s" % (node.name, getName(node.expr))
    return repr(node)
Example #33
0
def RunImportedModel(order, cog_p_r, inh_p_r, input_X):
    shape_v = input_X.shape
    n = shape_v[0]
    cog_p_r_l = flatten(cog_p_r.tolist())
    inh_p_r_l = flatten(inh_p_r.tolist())
    inp_l = flatten(input_X[0].tolist())
    rans = ao.get_result(cp.n_id, order, inp_l, inh_p_r_l, cog_p_r_l)
    for i in range(n):
        if i == 0:
            continue
        inp_l = flatten(input_X[i].tolist())
        tans = ao.get_result(cp.n_id, order, inp_l, inh_p_r_l, cog_p_r_l)
        rans = np.row_stack((rans, tans))
    return np.mat(rans)
Example #34
0
    def __init__(self, fin, f1, piece1, f2, piece2, f3, piece3, h1, pieceh1, h2, pieceh2, outputs,
                 lr, C, pDropConv=0.2, pDropHidden=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropConv = pDropConv
        self.pDropHidden = pDropHidden
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        self.paramsCNN = []
        self.paramsMLP = []
        mapunits = []
        pieces = []
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        self.paramsCNN.append(layerCNNParams((f1 * piece1, fin, 3, 3)))  # conv: (32, 32) pool: (16, 16)
        mapunits.append(f1)
        pieces.append(piece1)
        self.paramsCNN.append(layerCNNParams((f2 * piece2, f1, 3, 3)))  # conv: (16, 16) pool: (8, 8)
        mapunits.append(f2)
        pieces.append(piece2)
        self.paramsCNN.append(layerCNNParams((f3 * piece3, f2, 3, 3)))  # conv: (8, 8) pool: (4, 4)
        mapunits.append(f3)
        pieces.append(piece3)
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        self.paramsMLP.append(layerMLPParams((f3 * 4 * 4, h1 * pieceh1)))
        mapunits.append(h1)
        pieces.append(pieceh1)
        self.paramsMLP.append(layerMLPParams((h1, h2 * pieceh2)))
        mapunits.append(h2)
        pieces.append(pieceh2)
        self.paramsMLP.append(layerMLPParams((h2, outputs)))
        self.params = self.paramsCNN + self.paramsMLP

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.tensor4('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, mapunits, pieces, pDropConv, pDropHidden)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, mapunits, pieces, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Example #35
0
 def visit_BY(self, node, visited_children):
     children = flatten(visited_children)
     if isinstance(children[0], Gensuha):
         value = children # ybu = Y + BU
     else:
         value = Cmavo("".join(children), node.expr_name)
     return value
Example #36
0
def getVpt(v, k):
    v = abs(v)
    vList = v.tolist()
    vList = flatten(vList)
    vList.sort()
    k = (int)(k * len(vList))
    return vList[k]
Example #37
0
    def get_index_pages_links_list(self, base_url):
        logging.info("start page:%s" % base_url)
        try:
            f = urllib2.urlopen(base_url)
            web_str = f.read()
            soup = BeautifulSoup(web_str, from_encoding="unicode")

            urls_bs4_tage_list = soup.find("div", "page").find_all("a")
            urls_tag_str_list = map(lambda url_bs4_tag: str(url_bs4_tag),
                                    urls_bs4_tage_list)

            logging.info("urls_tag_str_list:%s" % urls_tag_str_list)
            logging.info("type(urls_tag_str_list):%s" %
                         type(urls_tag_str_list))
            logging.info("urls_tag_str_list[0]:%s" % urls_tag_str_list[0])
            logging.info("type(urls_tag_str_list[0]):%s" %
                         type(urls_tag_str_list[0]))

            raw_urls_str_2d_list = map(
                lambda url_tag_str: re.findall(r'href="(.*)" title',
                                               url_tag_str), urls_tag_str_list)
            pages_links_list = list(set(flatten(raw_urls_str_2d_list)))
            pages_links_list.append(base_url)
        except Exception as e:
            logging.error(e)

        return pages_links_list
Example #38
0
def merge_and_write_output(pos_dict,seq_strandness_dict,seq_QNAME_dict,seq_RNAME_dict,outpath):
    """
    function :write output
    input: four dict (pos_dict,seq_strandness_dict,seq_QNAME_dict,seq_RNAME_dict)
           outpath
    """
    global merge_read
    f=open(outpath,"w")
    for seq in seq_strandness_dict:
        strandness=seq_strandness_dict[seq]
        f.write("\t".join([seq_QNAME_dict[seq],strandness]))
        f.write("\n")
        f.write("position in hg19\tRNA sequence")
        f.write("\n")
        l_big_read=flatten([pos_dict[RNAME]  for RNAME in seq_RNAME_dict[seq] if RNAME in pos_dict])
        l_big_read=list(set(l_big_read))
        l_big_read=[x for x in l_big_read if return_chr_len(x)<500]
        print l_big_read
        l_merge_read=merge_read(l_big_read)
        sequence_for_test="NA"
        for pos in l_merge_read:
            if strandness=="+":
                sequence_for_test=T2U(get_seq(pos))
            elif strandness=="-":
                sequence_for_test=T2U(rc(get_seq(pos)))
            f.write("\t".join([pos,sequence_for_test]))
            f.write("\n")
    f.close()
Example #39
0
def naive_most_used_word(raw_company_names):
	max_words = round(np.mean(map(lambda company_name: len(company_name.split()), raw_company_names)))
	words = flatten(map(lambda company_name: company_name.split(), raw_company_names))
	counter = collections.Counter(words)
	first_max_words_names = (itertools.islice(counter.most_common(), 0, max_words))
	unified_name = 	" ".join(["".join(name[0]) for name in 	first_max_words_names])
	return create_unified_map(raw_company_names, unified_name)
Example #40
0
 def get_pulled_images(self, docker_host):
     get_images_url = self.__get_vm_url(docker_host) + "/images/json?all=0"
     current_images_info = json.loads(
         requests.get(get_images_url).content)  # [{},{},{}]
     current_images_tags = map(lambda x: x['RepoTags'],
                               current_images_info)  # [[],[],[]]
     return flatten(current_images_tags)  # [ imange:tag, image:tag ]
Example #41
0
    def composeFeatures (self):
        alldata=[]
        labeling=[]
        unpr= len(distances)
        print "complete data length", unpr
        for i  in range(unpr):

            di=distances[i]
            vfi=fingers[i]
            vhi=hand[i]
            labeli = labels[i]
            assert self.equal_ts(map(self.get_ts, [vfi,vhi, labeli]))

            labeli= self.get_feature(labeli)
            labeli = map( int, labeli)

            
            features= map (self.get_feature, [di, vfi,vhi])
            
            if not (True in  map (self.empty, features)) and \
               not (self.contains_class(0, labeli)) and \
               not (self.contains_class(15, labeli)):
                features= flatten (features)
                labeli = self.labelFingers(labeli)
                labeling += [labeli]
                alldata +=[map(float, features)]
                
    
        X = np.array(alldata)
        Y = np.array(labeling)

        return X,Y 
Example #42
0
def get_vars(clause):
    var_pattern = "\W[A-Z][A-Za-z0-9_]+|\W[A-Z]"
    if isinstance(clause, list):
        _vars = set(flatten([find(x, var_pattern) for x in clause]))
    if isinstance(clause, basestring):
        _vars = set(find(clause, var_pattern))
    return tuple(_vars)
Example #43
0
	def fts_async_product(self,fts2):
		assert type(fts2) == FTS
		result =  FTS(set([]),set([])) 
		
		nodes_prod = lambda u,v: tuple((list(u) if self.size>1 else [u]) + (list(v) if fts2.size>1 else [v]))
		labels_prod = lambda u,v: flatten((u,v))

		labels = nx.get_node_attributes(self,'label')
		labels_fts2 = nx.get_node_attributes(fts2,'label')

		for u in self.nodes():
			for v in fts2.nodes():
				result.add_node(nodes_prod(u,v),label=labels_prod(labels[u],labels_fts2[v]),weight=1.0)
		for (u,v) in self.edges():
			for (x,y) in fts2.edges():
				result.add_edge(nodes_prod(u,x),nodes_prod(u,y),weight=1.0)
				result.add_edge(nodes_prod(u,x),nodes_prod(v,x),weight=1.0)
				result.add_edge(nodes_prod(u,x),nodes_prod(v,y),weight=1.0)

		for u in self.graph['initial']:
			for v in fts2.graph['initial']:
				result.graph['initial'].add(nodes_prod(u,v))
		
		for u in self.graph['symbols']:
			for v in fts2.graph['symbols']:
				result.graph['symbols'].add(nodes_prod(u,v))

		result.size = self.size + fts2.size

		return copy.deepcopy(result)
    def constructFeatureVector(self, featureDictObjectA, featureDictObjectB,
                               selectedFeatures):
        assert ("Global<Maximum >" not in selectedFeatures)
        assert ("Global<Minimum >" not in selectedFeatures)
        assert ("Histrogram" not in selectedFeatures)
        assert ("Polygon" not in selectedFeatures)

        features = []

        for key in selectedFeatures:
            if key == 'RegionCenter':
                continue
            else:
                if not isinstance(
                        featureDictObjectA[key],
                        np.ndarray) or featureDictObjectA[key].size == 1:
                    features.append(
                        float(featureDictObjectA[key]) -
                        float(featureDictObjectB[key]))
                else:
                    features.extend(flatten((featureDictObjectA[key].astype('float32') \
                                             - featureDictObjectB[key].astype('float32')).tolist()))

        # there should be no nans or infs
        assert (np.all(np.isfinite(np.array(features))))

        return features
Example #45
0
def generate_dict(df, columns):
    def convert_row(x):
        ret = []
        for feat_name, feat_values in x.asDict().items():
            if not feat_values: continue
            for feat_value in str(feat_values).split(','):
                ret.append((feat_name, feat_value))
        return ret

    ## operate DataFrame by rdd
    # select column
    df = df[columns]
    # convert Row to tuple, and flat them
    rdd = df.rdd.flatMap(convert_row)
    # stat frequency
    rdd = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
    #rdd = rdd.map(lambda x: '\t'.join([str(e) for e in flatten(x)]))
    rdd = rdd.map(lambda x: flatten(x))
    print("dict rdd:", rdd)
    input_schema = StructType()
    input_schema.add(StructField("feature", StringType(), True))
    input_schema.add(StructField("value", StringType(), True))
    input_schema.add(StructField("count", StringType(), True))
    df = sqlContext.createDataFrame(rdd, input_schema)
    return df
Example #46
0
def find_axes2(nodeSet, candidates):
    """
    Returns all possible coherent axes
    :param node: Current node ([ballot_set], upper_bound)
    :param candidates: List of candidates
    :return: List of possible axes, False if none found
    """
    L = []
    ballots = transform_ballots(nodeSet)
    # Regrouper les bulletins qui contiennent le candidat c
    for c in candidates:
        S = Set([])
        for ballot in ballots:
            if c in ballot:
                S += Set([ballot])
        S += Set([Set([c])])
        L += Set([S])

    # Transformer liste de Set en PQ-tree et aligner
    axes = P(L)
    for ballot in ballots:
        try:
            axes.set_contiguous(ballot)
        except:
            return False, 0
    # Determiner les axes à partir des alignements trouvés
    all_axes = [] # Liste des axes
    for axis in axes.orderings():
        A = []
        for ballot_set in flatten(axis):
            A += [L.index(ballot_set)+1]
        all_axes += [A]
    axes_filtered = filter_symmetric_axes(all_axes)
    return axes_filtered, axes.cardinality()
Example #47
0
def send_frequency_reminder(self):
    # We exclude irrelevant frequencies.
    frequencies = [f for f in UPDATE_FREQUENCIES.keys()
                   if f not in ('unknown', 'realtime', 'punctual')]
    now = datetime.now()
    reminded_orgs = {}
    reminded_people = []
    allowed_delay = current_app.config['DELAY_BEFORE_REMINDER_NOTIFICATION']
    for org in Organization.objects.visible():
        outdated_datasets = []
        for dataset in Dataset.objects.filter(
                frequency__in=frequencies, organization=org).visible():
            if dataset.next_update + timedelta(days=allowed_delay) < now:
                dataset.outdated = now - dataset.next_update
                dataset.frequency_str = UPDATE_FREQUENCIES[dataset.frequency]
                outdated_datasets.append(dataset)
        if outdated_datasets:
            reminded_orgs[org] = outdated_datasets
    for reminded_org, datasets in reminded_orgs.iteritems():
        print(u'{org.name} will be emailed for {datasets_nb} datasets'.format(
              org=reminded_org, datasets_nb=len(datasets)))
        recipients = [m.user for m in reminded_org.members]
        reminded_people.append(recipients)
        subject = _('You need to update some frequency-based datasets')
        mail.send(subject, recipients, 'frequency_reminder',
                  org=reminded_org, datasets=datasets)

    print('{nb_orgs} orgs concerned'.format(nb_orgs=len(reminded_orgs)))
    reminded_people = flatten(reminded_people)
    print('{nb_emails} people contacted ({nb_emails_twice} twice)'.format(
        nb_emails=len(reminded_people),
        nb_emails_twice=len(reminded_people) - len(Set(reminded_people))))
    print('Done')
Example #48
0
def split_list(filename):
    l=[]
    flatlist = []
    fileold = open(path + filename).readlines()
    fileold.pop(0)
    fileold.pop(len(fileold)-1)
    #split date, time, id
    for line in fileold:
        #filter lines not starting with date
        if re.match(r"^\d{2}.\d{2}.\d{4}\s\d{2}:\d{2}:\d{2}:",line):
            l.append(line.split(' ', 3))
        else:
            if("has been installed" in line):
                notify.append(line)
            #if lines not starting with date, append to other list to
            #print these messages at the end of the new file
            other_messages.append(line)
    #split time
    for entry in l:
        entry[1]=entry[1].split(':',3)
    #flatten time sublist 
    for i in l:
        flatlist.append(flatten(i))
    #os.remove(path + filename)
    return flatlist
Example #49
0
def split_list(filename):
    l = []
    flatlist = []
    fileold = open(path + filename).readlines()
    fileold.pop(0)
    fileold.pop(len(fileold) - 1)
    #split date, time, id
    for line in fileold:
        #filter lines not starting with date
        if re.match(r"^\d{2}.\d{2}.\d{4}\s\d{2}:\d{2}:\d{2}:", line):
            l.append(line.split(' ', 3))
        else:
            if ("has been installed" in line):
                notify.append(line)
            #if lines not starting with date, append to other list to
            #print these messages at the end of the new file
            other_messages.append(line)
    #split time
    for entry in l:
        entry[1] = entry[1].split(':', 3)
    #flatten time sublist
    for i in l:
        flatlist.append(flatten(i))
    #os.remove(path + filename)
    return flatlist
def process_data_count(file_list, path, l):

    data = []
    for file in file_list:
        print('Processing data: ', file)
        data_temp = []
        file_object = open(path+file+'.txt')
        file_name = file_object.readlines()
        file_name = [s.strip('\r\n') for s in file_name]

        del file_name[0]

        # Split the CDR3s with spaces
        for fileline in file_name:
            s = fileline.split(',')
            s = (s[0]+' ')*int(s[1])
            s = s.split(' ')
            data_temp.append(s[:-1])

        data_temp = flatten(data_temp)

        # Randomly select l CDR3s
        if l != 'all':
            random.shuffle(data_temp, random.random)
            data_temp = data_temp[:l]

        data.append(' '.join(chain(data_temp)))

    return data
def pHash(imgfile):
    # 加载并调整图片为32x32灰度图片
    img = cv2.imread(imgfile, 0)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_CUBIC)

    # 创建二维列表
    h, w = img.shape[:2]
    vis0 = np.zeros((h, w), np.float32)
    vis0[:h, :w] = img  # 填充数据

    # 二维Dct变换
    vis1 = cv2.dct(cv2.dct(vis0))
    # 拿到左上角的8 * 8
    vis1 = vis1[0:8, 0:8]

    # 把二维list变成一维list
    img_list = flatten(vis1.tolist())

    # 计算均值
    avg = sum(img_list) * 1. / len(img_list)
    avg_list = ['0' if i < avg else '1' for i in img_list]

    # 得到哈希值
    return ''.join([
        '%x' % int(''.join(avg_list[x:x + 4]), 2) for x in range(0, 8 * 8, 4)
    ])
Example #52
0
def email_prob(path_to_text_file, ham_probs, spam_probs):    
    prob_list = [] #list of probabilities that will be factored
    doc_list = [] #will be used to create list of words in doc
    with open(str(path_to_text_file)) as current_file: #open text file
        qlist = [item.split() for item in current_file] #break into lists
        qlist = flatten(qlist) # correct for newline
        qlist = [re.sub(r'[^A-Za-z0-9]+', '', x) for x in qlist] #punctuation 
        qlist = filter(None, qlist) #remove blank space entry
        for word in qlist: #iterate through words in list
            if word.lower() not in doc_list: #check if word is already in file
                doc_list.append(word.lower()) #add word to file's list of words
    #now doc_list is a list of words in the document
    #establish our key values for bayes calculation
    prob_ham = float(len(ham_probs)/float((len(ham_probs) + len(spam_probs))))
    prob_spam = 1-prob_ham
    spam_bayes = []
    for word in doc_list: #iterate through terms in our email
        prob_word_in_spam = 0
        for k,v in spam_probs.iteritems(): #iterate through our spam probs
            if k == word and word != 'subject':
                prob_word_in_spam += float(spam_probs[k]) #assign values
        for k,v in ham_probs.iteritems(): #iterate through our ham probs
            if k == word and word != 'subject':
                prob_word_in_ham = float(ham_probs[k]) #assign values
        if prob_word_in_spam > 0.0:
            prob_word = float((prob_word_in_spam * prob_spam) + (prob_word_in_ham * prob_ham))
            prob_spam_given_word = float((prob_word_in_spam * prob_spam) / (prob_word))
            spam_bayes.append(float((prob_spam_given_word * math.log(prob_word/(1-prob_word))) + math.log(1-prob_word)))
    
    log_prob_email_is_spam = 0
    for val in spam_bayes:
        log_prob_email_is_spam += val
    return math.exp(log_prob_email_is_spam)
def append_data(src, dest, classes):
    """
    read class c from src, append result to dest
    :param src: url
    :param dest: url
    :param classes: classes a list
    :return:
    """
    for c in classes:
        if c not in EMOTION_CLASS.keys():
            raise ValueError("%s is not support class" % c)

    src_tree = None
    dest_tree = None
    try:
        src_tree = ET.parse(src)
        dest_tree = ET.parse(dest)
    except IOError:
        print "cannot parse file"
        exit(-1)

    if src_tree and dest_tree:
        src_root = src_tree.getroot()
        dest_root = dest_tree.getroot()

        l = [src_root.findall("weibo[@emotion-type='%s']" % c) for c in classes]
        l = flatten(l)
        random.shuffle(l)

        [dest_root.append(l1) for l1 in l]

        # write to file
        dest_tree.write(dest, encoding="utf-8")

        print "append data is done."
Example #54
0
    def __init__(self, chords, title = 'generated by walkingbass.py', author = 'aul Chambers'):
        self.author = author
        self.title = title
        self.chords = chords
        self.chordsd = []

        self.i.name = "Double Bass"
        self.i.clef = "bass"
        self.i.set_range((Note('C-0'), Note('F-6')))

        self.bassline = self._realbook(self.chords)
        self.bassline = flatten(self.bassline)
        longest_common = longest_duplicate_substring(self.bassline)
        print longest_common #TODO:IMPLETMENT THIS
        return
        self._naive()
        self._flow()


        self.track = self._create_track()
        self.track.add_chords(self.chords)
        
        self.to_png()
        self.to_midi()
        self.to_mp3()
def getIdenticalFiles():
	fileList=[]
	AllFilesList=[]
	for dirname,dirnames,filenames in os.walk('./'):
	    for file in filenames:
	    	filePath = os.path.join(dirname, file)
	        AllFilesList.append(filePath)
	
	finalFileList=flatten(AllFilesList)
	identicalFileList=[]
	for i in range(0,len(finalFileList)):
		x=finalFileList.__getitem__(i)
		for j in range(i,len(finalFileList)):
			result=""
			y=finalFileList.__getitem__(j)
			if x!=y:
				str="diff"+" "+x+" "+y
				result=commands.getoutput(str)
				if result=="" or result is None:
					filetuple=(x,y)
					identicalFileList.append(filetuple)
			j+=1
		i+=1
	if len(identicalFileList)!=0:
		print "List of identical files in the current directory:\n",identicalFileList
	else:
		print "There are no identical files in the current directory"	
	print "\n\n"
Example #56
0
 def generic_visit(self, node, visited_children):
     if node.expr_name and is_selmaho_expression(node.expr_name):
         return Cmavo(lerpoi(visited_children), node.expr_name)
     elif node.node_type() in (LITERAL, REGEX):
         return node.text
     else:
         return flatten(visited_children)
Example #57
0
    def regular_jobdesc(self):
        if self.jdstr:
            jobDesc = self.jdstr.find("div", "job_request").find_all("p")
            #没找到p标签的情况下
            if not jobDesc:
                if self.jdstr.find("div", "job_request").find("ul"):
                    jobDesc = self.jdstr.find("div", "job_request").get_text().split("\n")

            #找到p标签的情况下
            else:
                jobDesc = [i.get_text().strip() if i.get_text().find("\n")==-1 and i.get_text().strip() else i.get_text().split("\n") for i in jobDesc ]
                jobDesc = flatten(jobDesc)

                self.jdJob[self.jobName]["jobDesc"] = u'\n'.join(jobDesc)
                if len(jobDesc)==1:
                    jobDesc = jobDesc[0]
                    jobDesc = '\n'.join(filter(None,re.split(u";|:|。| ", jobDesc)))
                if type(jobDesc).__name__=="unicode":
                    jobDesc = jobDesc.split('\n')
            '''
            逐行去找信息
            self.flag是标志位,主要处理下述情况:
            工作地址:
            广东省深圳市XX路
            self.extra_info有四个参数:
            第一个line是一行文本(不带标签信息)
            第二个idx是该行文本所在的行号
            第三个add_desc是是否将该行文本加入职位描述信息
            第四个clean_major为是否将之前得到的专业与技能信息进行清空
            '''
            self.flag = None
            for idx,line in enumerate(jobDesc):
                # print line+"Z"*50
                self.extra_info(line,idx,add_desc=False)
Example #58
0
def stringify_affiliation_rec(node):
    """
    Flatten and join list to string
    ref: http://stackoverflow.com/questions/2158395/flatten-an-irregular-list-of-lists-in-python
    """
    parts = recur_children(node)
    return " ".join(flatten(parts)).strip()
Example #59
0
def getVpt(v,k):
    v=abs(v)
    vList=v.tolist()
    vList=flatten(vList)
    vList.sort()
    k=(int)(k*len(vList))
    return vList[k]
Example #60
0
    def main(self):
        """docstring for main"""
        # fname = '/home/gold/Documents/ICS/Home.ics'
        args = parse_args()
        fname = args.calendar_file
        self.output_format = args.output_format
        self.display_header = args.display_header
        self.start = args.date
        self.start = utc.localize(self.start)
        self.stop = args.date + datetime.timedelta(args.num_days)
        self.stop = utc.localize(self.stop)

        cal = icalendar.Calendar.from_ical(open(fname, 'rb').read())

        events = []
        for event in cal.walk('vevent'):
            new_event = self.process_event(event)
            if new_event:
                events.append(new_event)
        events = flatten(events)
        events = sorted(events, cmp=self.date_compare)

        count = 0
        if self.output_format == 'tsv' and self.display_header:
            print "current\tdtstart\tdtend\tduration\tsummary\tlocation\t" \
                "status\tpriority\torganizer\tall day\trecurring\tcreated\t" \
                "attendees"
        for x in events:
            count += 1
            if args.num_records != 0 and count > args.num_records:
                break
            self.output_record(x)