Ejemplo n.º 1
0
def clipMatrix(matrix, minPerRow=1, minPerCol=1):
    """
    zeros out elements of a matrix such that only the highest values in
    each row column are left
    guarantees at least minPerRow (or Col) - but in the case of ties, it may include
    more (it uses rank.rank in order to handle ties correctly)
    also, a row may include many elements if this is required to keep the columns happy
        (and vice versa)
    :param matrix:
    :param minPerRow:
    :param minPerCol:
    :return:
    """
    m = N.zeros(matrix.shape)
    if minPerRow:
        for j, r in enumerate(matrix):  # each row
            a = rank.rank(r, ties="max")
            lastIdx = len(r) - minPerRow
            for i in range(len(r)):
                if a[i] >= lastIdx:
                    m[j, i] = 1
    if minPerCol:
        for j, c in enumerate(matrix.transpose()):  # each column
            a = rank.rank(c, ties="max")
            lastIdx = len(c) - minPerCol
            for i in range(len(c)):
                if a[i] >= lastIdx:
                    m[i, j] = 1
    m = m * matrix
    return m
Ejemplo n.º 2
0
def DFS_bound2(start, goal, depth, cycle_detection=True, verbose=False):
    bases, _, patterns, columns = rank.get_information(start) #parameters for unrank
    frontier = []
    start_int = rank.rank(start) #convert start matrix to its rank integer
    frontier.append([start_int])

    while frontier:
        path = frontier.pop()
        last_vertex = path[-1]

        # convert to matrix for goal and neighbors
        matrix_last = unrank.unrank(last_vertex, bases, patterns, columns)
        if is_goal(matrix_last, goal):
            if verbose: #if asked by user, print path
                print(path)
            print("length:", len(path))
            return path
        if len(path) == depth:
            continue
        for next_vertex in neighbors(matrix_last):
            int_next = rank.rank(next_vertex) #convert neighbor matrix to integer
            if cycle_detection:
                if int_next in path:
                    continue
            new_path = path + [int_next]
            frontier.append(new_path)
    return None
Ejemplo n.º 3
0
def DFS_prune2(start, goal, verbose=False):
    bases, _, patterns, columns = rank.get_information(start) #parameters for unrank
    frontier = []
    start_int = rank.rank(start) #convert start matrix to its rank integer
    frontier.append([start_int])

    #figure out size of visited array (by multiplying bases) and initialize it
    prod = 1
    for i in bases:
        prod *= i
    visited = [False for u in range(prod)]
    visited[start_int] = True

    while frontier:
        path = frontier.pop()
        last_vertex = path[-1]

        # convert to matrix for goal and neighbors
        matrix_last = unrank.unrank(last_vertex, bases, patterns, columns)
        if is_goal(matrix_last, goal):
            if verbose: #if asked by user, print path
                print(path)
            print("length:", len(path))
            return path
        for next_vertex in neighbors(matrix_last):
            int_next = rank.rank(next_vertex) #convert neighbor matrix to integer
            if visited[int_next]:
                continue
            new_path = path + [int_next]
            visited[int_next] = True
            frontier.append(new_path)
    return None
Ejemplo n.º 4
0
def BFS3(start, goal, cycle_detect=False, verbose=False, ranking=False):
    frontier = []

    if ranking:
        bases, _, patterns, columns = rank.get_information(start)
        start_int = rank.rank(start)
        frontier.append([start_int])

        #figure out size of visited array (by multiplying bases) and initialize it
        prod = 1
        for i in bases:
            prod *= i
        visited = [False for u in range(prod)]
        visited[start_int] = True
    else:
        frontier.append([start])

    while frontier:
        path = frontier.pop(0) #select and remove first path from frontier
        last_vertex = path[-1]

        if ranking:
            # convert from integer to matrix to check if goal and/or find neighbors
            last_vertex = unrank.unrank(last_vertex, bases, patterns, columns) ####!!!!!####

        if is_goal(last_vertex, goal):  #check if last vertex in path is goal
            if verbose: #if asked by user, print each vertex(matrix) in path
                if ranking:
                    print(path)
                else:
                    for matrices in path:
                        for line in matrices:
                            print(line)
                        print()
            print(len(path))    #print length of solution for convenience
            return path


        #enter procedure here, if user asked to use ranking/unranking
        if ranking:
            for next_vertex in neighbors(last_vertex):          ###!!!###
                int_next = rank.rank(next_vertex) #convert neighbor matrix to integer
                if cycle_detect:
                    if visited[int_next]:
                        continue
                new_path = path + [int_next]
                visited[int_next] = True
                frontier.append(new_path)

        #enter procedure here, if user did not want to use ranking/unranking
        else:
            for next_vertex in neighbors(last_vertex):
                if cycle_detect:    #include cycle detection if asked for
                    if next_vertex in path:
                        continue
                new_path = path + [next_vertex]
                frontier.append(new_path)

    return None
Ejemplo n.º 5
0
def graph_draw(graph):
	rank.rank(graph)
	print "done rank"
	tmp_graph = ordering.ordering(graph)
	print "done ordering"
	position.position(tmp_graph)
	print "done position"

	return tmp_graph
Ejemplo n.º 6
0
def crossvalidate(input, fold, outputdir, distance, ntrees, reuse, trainingdir,
                  randomforestdir, predictiondir):
    cases = []

    with open(input) as inputfile:
        reader = csv.reader(inputfile, delimiter=',', quotechar='"')
        for row in reader:
            cases.append((row[0], row[1:]))

    iteration = 1
    kfold = KFold(len(cases), n_folds=fold)
    for train_indices, test_indices in kfold:
        generateTrainingInput = {}

        for train_index in train_indices:
            generateTrainingInput[cases[train_index][0]] = \
                                  cases[train_index][1]

        trainingout = None
        if trainingdir:
            trainingout = os.path.join(trainingdir, 'cv-fold%d-training.csv' %
                                                    iteration)

        if reuse and trainingout and os.path.isfile(trainingout):
            print("Reusing training set CSV file %s..." % trainingout)
            trainingset = pd.read_csv(trainingout, quoting=csv.QUOTE_NONNUMERIC)
        else:
            trainingset = generateTrainingSet(generateTrainingInput, distance,
                                              output=trainingout)
        rfoutput = None
        if randomforestdir:
            rfoutput = os.path.join(randomforestdir, 'cv-fold%d-rf.joblib' %
                                          iteration)

        if reuse and rfoutput and os.path.isfile(rfoutput):
            print("Reusing RandomForest file %s..." % rfoutput)
            rf = joblib.load(rfoutput)
        else:
            rf = train(trainingset, ntrees, rfoutput)

        for test_index in test_indices:
            predictionList = predict(rf, cases[test_index][1], saveOutput=predictiondir,
                    outputdir=predictiondir, templateFile=cases[test_index][0])

            for j, model in enumerate(predictionList):
                model.label = os.path.basename(cases[test_index][1][j])

            rank(predictionList, os.path.join(outputdir, '%s-ranking.csv' %
                os.path.splitext(os.path.basename(cases[test_index][0]))[0]))

        iteration += 1
Ejemplo n.º 7
0
def dominanceMain(population, functionObject):
    ###为函数对象赋值新的种群个体
    functionObject.population = population

    #计算新种群目标函数数值,并建立矩阵 funScore
    funScore = np.vstack(
        (functionObject.objFun_1(), functionObject.objFun_2()))
    funScore = np.transpose(funScore)

    N = population.shape[0]
    nN = N / 2

    #输入函数数值矩阵,求得个体 分层和拥挤距离 字典
    r_dict = dominance(funScore)
    layerDict = rank(r_dict)

    s = 0
    indicate = []
    for i in xrange(1, len(layerDict) + 1):
        s += len(layerDict[i])
        if s < nN:
            indicate.extend(layerDict[i])
            continue
        elif s == nN:
            indicate.extend(layerDict[i])
            break
        else:
            s -= len(layerDict[i])
            temp = crowddist(funScore, layerDict[i])
            indicate.extend(temp[:nN - s])
            break

    #返回新种群
    return population[indicate]
Ejemplo n.º 8
0
def create_nw(data, replace_nans):
    nw = np.corrcoef(data)
    np.fill_diagonal(nw, 1)
    nw = rank(nw)
    if replace_nans:
        nw[np.isnan(nw)] = bottleneck.nanmean(nw)
    return nw
Ejemplo n.º 9
0
def nw_aggregation(nw_paths, genes, file_key='nw'):
    """Function for aggregating co-expression networks
    
    Takes a list of paths to HDF5 files and reads in networks,
    avearges them and then re-ranks.

    Each HDF5 needs to be in the Pytable in the fixed format with
    the network stored under the key listed in the keyword argument file_key
    
    Arguments:
        nw_paths {list} -- list of strings or paths to HDF5 files
        genes {np.array} -- numpy array of genes for network
    
    Keyword Arguments:
        file_key {str} --  key in HDF5 network is stored under (default: {'nw'})
    
    Returns:
        pd.DataFrame -- Aggregate Network
    """

    agg_nw = np.zeros([genes.shape[0], genes.shape[0]])
    for nw_path in nw_paths:
        nw = pd.read_hdf(nw_path,file_key)
        fill = bottleneck.nanmean(nw.values,axis=None)
        agg_nw +=nw.loc[genes,genes].fillna(fill).values
        del nw
        gc.collect()

    return pd.DataFrame(rank(agg_nw),index=genes, columns=genes)
Ejemplo n.º 10
0
def create_nw(data, replace_nans=True):
    array = data.values.T
    nw = np.corrcoef(array)
    np.fill_diagonal(array, 1)
    nw = rank(nw)
    if replace_nans:
        nw[np.isnan(nw)] = bottleneck.nanmean(nw)
    return nw
Ejemplo n.º 11
0
def LCFS2(start, goal, pruning=True, verbose=False):
    bases, _, patterns, columns = rank.get_information(
        start)  #parameters for unrank
    start_int = rank.rank(start)  #convert start matrix to its rank integer

    #frontier will look like [(cost, [path]), (cost, [path]), (cost, [path]),...]
    frontier = []  #this frontier will be used as a heap
    heapq.heappush(
        frontier,
        (0, [start_int]))  #heap prioritized by first element of tuple

    # figure out size of visited array (by multiplying bases) and initialize it
    prod = 1
    for i in bases:
        prod *= i
    visited = [False for u in range(prod)]
    visited[start_int] = True

    while frontier:
        #path_tuple = (cost, [path])
        path_tuple = heapq.heappop(
            frontier)  #select and remove first path tuple from frontier

        #last_vertex = path[-1] from path_tuple
        last_vertex = path_tuple[1][-1]

        # convert from integer to matrix to check if goal and/or find neighbors
        matrix_last = unrank.unrank(last_vertex, bases, patterns, columns)
        if is_goal(matrix_last, goal):  #check if last vertex in path is goal
            if verbose:  #if asked by user, print each vertex(matrix) in path
                print(path_tuple[1])
            print("cost\t:",
                  path_tuple[0])  #print cost of soln for convenience
            print("length\t:", len(
                path_tuple[1]))  #print length of solution for convenience
            return path_tuple
        for next_tuple in neighbors(matrix_last, with_cost=True):
            int_next = rank.rank(next_tuple[1])
            if pruning:  #include cycle detection if asked for
                if visited[int_next]:
                    continue
            new_path = (path_tuple[0] + next_tuple[0],
                        path_tuple[1] + [int_next])
            visited[int_next] = True
            heapq.heappush(frontier, new_path)
    return None
Ejemplo n.º 12
0
def create_bootstrap_nw(datasets_selected):
    agg_nw = np.zeros([genes.shape[0], genes.shape[0]])
    for dataset in datasets_selected:
        logging.info(dataset)
        nw = pd.read_hdf(
            f'/home/bharris/biccn_paper/data/bulk_rna/networks/{dataset}_pearson_nw.hdf5',
            'nw')
        agg_nw += nw.values
        del nw
        gc.collect()
    return pd.DataFrame(rank(agg_nw), index=genes, columns=genes)
Ejemplo n.º 13
0
    def __init__(self, p_suit, p_rank):
        r = rank()
        valid_rank = r.validate(p_rank)

        s = suit()
        valid_suit = s.validate(p_suit)

        if (valid_rank and valid_suit):
            self.suit = p_suit
            self.rank = p_rank
        else:
            raise ValueError("Invalid Rank or Suit")
Ejemplo n.º 14
0
    def __init__(self, new_deck=False, cards=None):
        if (cards is None):
            cards = []

        self.cards = cards
        if (new_deck):
            ranks = rank()
            suits = suit()
            for s in suits.values:
                for r in ranks.values:
                    c = card(s, r)
                    self.cards.append(c)
Ejemplo n.º 15
0
def find_keyword(word_list,lentext=3):
    '''
    ระบบค้นหาคำสำคัญ
    หลักการ ลบ stopword ออกแล้ว นับจำนวนคำที่ส่งค่าออกมา

    find_keyword(word_list,lentext=3)
    word_list คือ คำที่อยู่ใน list
    lentext คือ จำนวนคำที่มีอยู่ใน list สำหรับใช้กำหนดค่าหา keyword ค่าเริ่มต้นคือ 3
    '''
    filtered_words = [word for word in word_list if word not in set(stopwords.words('thai'))]
    word_list=rank(filtered_words)
    return {k:v for k, v in word_list.items() if v>=lentext}
Ejemplo n.º 16
0
 def run_ranking(self):
     file = open(self.queries, "r")
     query = file.readline().rstrip()
     with open(self.op_file, "w+") as output:
         while query:
             rank_sys = rank.rank(self.indexer)
             list_top_k = rank_sys.get_top_k(self.k, query)
             self.record(rank_sys, query, list_top_k, output)
             print("Done with query: ", query)
             query = file.readline().rstrip()
             output.write("\n")
             output.write("\n")
     output.close()
     file.close()
Ejemplo n.º 17
0
def main(start, end, apiKey, mode):

    path, places, dist = route(start, end, apiKey, mode)

    finalDestinations = rank(calcDistance(places, path), dist)

    happyTrail = []
    nameAddress = []
    for poi in finalDestinations:
        if poi is not None:
            happyTrail.append(poi[0])
            nameAddress.append((poi[1]['name'], poi[1]['formatted_address']))

    return happyTrail, nameAddress
Ejemplo n.º 18
0
def estimate(population, functionObject):
    ###为函数对象赋值新的种群个体
    functionObject.population = population

    #计算新种群目标函数数值,并建立矩阵 funScore
    funScore = np.vstack(
        (functionObject.objFun_1(), functionObject.objFun_2()))
    funScore = np.transpose(funScore)

    #输入函数数值矩阵,求得个体 分层和拥挤距离 字典
    r_dict = dominance(funScore)
    layerDict = rank(r_dict)

    print funScore[layerDict[1]]
Ejemplo n.º 19
0
    def __init__(self, parameter_suit, parameter_rank):
        r = rank()
        valid_rank = r.validate(parameter_rank)

        s = suit()
        valid_suit = s.validate(parameter_suit)

        if (valid_rank and valid_suit):
            self.suit = parameter_rank
            self.rank = parameter_rank
        else:
            raise ValueError("Ivalid Rank or Suit")
            self.suit = "invalid"
            self.rank = "invalid"
Ejemplo n.º 20
0
def rankInRows(matrix, reverse=True, frank=False):
    """
    perform a rank transform on each ROW of the matrix
    so each row ranks its columns

    :param matrix:
    :param frank: floating point ranking tolerance for ties
    :return: an integer matrix
    """
    rmat = N.zeros(matrix.shape, dtype='int32')
    for i in range(matrix.shape[0]):
        r = matrix[i]
        n = rank.rank(r, ties="max", frank=frank)
        rmat[i] = N.max(n) - n if reverse else n
    return rmat
Ejemplo n.º 21
0
 def OnClickRun(self, evt):
     self.beginTime = int(time.mktime(datetime.datetime.now().timetuple()))
     runtime = 0
     # 如果功能按钮: 只刷指数和获取排名同时开启的话, 提示错误
     runType = self.runTypeBtn.GetValue()
     getRank = self.getRankBtn.GetValue()
     if runType and getRank:
         self.errInfo(u'功能选择中, "只刷指数"和"获取排名"只能选择其中之一. ')
         return
     func = (1 if runType else 2 if getRank else 0)
     # 如果未填写targetURLkw, 提示错误
     self.urlkw = self.target_kw.GetValue().strip()
     if (not self.urlkw and not self.runType):
         self.errInfo(u"请填写目标页面标题关键字, 并以半角英文分隔!")
         return
     else:
         self.urlkw = self.urlkw.split(",")
     # 如果未选择keyworks文件, 提示错误
     if not self.keyworks:
         self.errInfo(u"请选择关键词配置文件!")
         return
     self.proxyConfig = self.proxyText.GetValue().strip()
     # 如果选择了固定运行次数, 但是赋值为空, 提示错误
     if self.runTime.GetValue():
         runtime = self.runText.GetValue().strip()
         if (not runtime) or (not runtime.isdigit()) or (not int(runtime)):
             self.errInfo(u"运行次数配置有误!")
             return
     if func == 2:  # 如果选择获取排名功能, 执行次数固定设置为1
         runtime = 1
     # 如果代理配置为空, 提示错误
     if self.proxyConfig == "" or self.proxyConfig == u"点击右侧按钮选择文件...":
         self.errInfo(u"代理设置不能为空!")
         return
     self.multiText.SetValue("")
     self.buttonRun.SetLabel(u"运行中")
     self.buttonStop.SetLabel(u"停止")
     evt.GetEventObject().Disable()
     self.DisableOnRun()
     self.OnStart()
     searcher = self.EvtRadioBox_SPF(evt)
     drvierType = self.EvtRadioBox_PF(evt)
     isPhantomjs = self.getIsPhantomjs(evt)
     self.rankObj = rank(searcher, drvierType, isPhantomjs, self.proxyType,
                         self.proxyConfig, self.keyworks, self.urlkw, func,
                         int(runtime))
def dominanceMain(population, func_object):
    # 为函数对象赋值新的种群个体
    func_object.population = population

    # 计算新种群目标函数数值,并建立矩阵 funScore
    func_score = vstack((func_object.objFun_1(), func_object.objFun_2()))

    set_printoptions(suppress=True)  # 不使用科学计数

    func_score = transpose(func_score)  # 转置

    N = population.shape[0]  # 获得矩阵的行数
    nN = N // 2  # 更改为双斜线
    #nN = N
    # 输入函数数值矩阵,求得个体 分层和拥挤距离 字典
    r_dict = dominance(func_score)  # 关系字典{个体号码:[支配其的个数, 被其支配的个体列表]}
    # print(r_dict)
    layer_dict = rank(r_dict)  # 分层字典layerDict {1:[3,1,4], 2:[2,0]}

    s = 0
    indicate = []

    for i in range(1, len(layer_dict) + 1):
        s += len(layer_dict[i])
        if s < nN:
            indicate.extend(layer_dict[i])
            continue
        elif s == nN:
            indicate.extend(layer_dict[i])
            break
        else:
            s -= len(layer_dict[i])
            temp = crowddist(func_score, layer_dict[i])
            indicate.extend(temp[:nN - s])
            break

    for i in range(len(layer_dict)):  # i从零开始 需要+1
        #print(layerDict[i+1])
        ceng_population = array(population[layer_dict[i + 1]])
        N = ceng_population.shape[0]
        n = len(array(list(set([tuple(t) for t in ceng_population]))))
        print("第 ", i, "层的重复率:", n / N, "--该层个体数量:", N, "--不重复个体数量:", n)

    print(len(indicate))
    # 返回新种群
    return population[indicate]
def estimate(population, functionObject):
    # 为函数对象赋值新的种群个体
    functionObject.population = population

    # 计算新种群目标函数数值,并建立矩阵 funScore
    funScore = vstack((functionObject.objFun_1(), functionObject.objFun_2()))
    set_printoptions(suppress=True)
    funScore = transpose(funScore)  # 二维数组是转置效果
    # print("funScore",funScore)

    # 输入函数数值矩阵,求得个体 分层和拥挤距离 字典
    r_dict = dominance(funScore)
    layerDict = rank(r_dict)

    #ls = np.append(layerDict[1],layerDict[2])
    # print(ls)

    print(funScore[layerDict[1]])
Ejemplo n.º 24
0
    def submit_selected_urls(self, positive, negative):
    #Perform ranking and diversifing on all urls with regard to the positive urls
    #
    #Args:
    #   labeled_urls: a list of pair <url, label>. Label 1 means positive and 0 means negative.
    #Returns:
    #   urls: list of urls with ranking scores

        # Test new positive and negative examples with exisitng classifier
        # If accuracy above threshold classify pages
        # Ranking 
        # Diversification

        documents = {}
        other = []
        
        all_docs = get_bag_of_words(list(self.urls_set))

        for url in positive:
            if url in all_docs:
                self.positive_urls_set.add(url)
                self.negative_urls_set.discard(url)

        for url in negative:
            if url in all_docs:
                self.negative_urls_set.add(url)
                self.positive_urls_set.discard(url)

        for url in all_docs.keys():
            content = all_docs[url]
            if (len(self.negative_urls_set) == 0) or (url not in self.negative_urls_set):
                documents[url] = content
                if url not in self.positive_urls_set:
                    other.append(url)

        self.tfidf = tfidf.tfidf(documents)

        chdir(self.memex_home + '/seed_crawler/ranking')
        ranker = rank.rank()
        
        [ranked_urls,scores] = ranker.results(self.tfidf,self.positive_urls_set, other)
        return [ranked_urls, scores] # classified, ranked, diversified 
def create_nw(data, replace_nans):
    """Compute Co-expression network from the data
    
    Core network building function. We always run with replace_nans = True
    Slicing single cell data will reguarly produce genes with no counts.
    And any correlation with a vector of all 0s is Nan.
    
    Arguments:
        data {np.array} -- Array of float values in shape of genes x cells
        replace_nans {bool} -- Flag for whether to replace Nans in network
    
    Returns:
        np.array -- ranked co-expression matrix of genes x genes 
    """
    nw = np.corrcoef(data)
    np.fill_diagonal(nw, 1)
    nw = rank(nw)
    if replace_nans:
        nw[np.isnan(nw)] = bottleneck.nanmean(nw)
    return nw
Ejemplo n.º 26
0
def vs_query(argv):
    #get index directory
    path = argv[0]
    N = len(os.listdir(path)) - 1
    numDocs = int(argv[1])
    termString = ' '.join(argv[3:])
    #tokenize terms before scoring
    temp = termString.split(' "')
    phrases = []
    regular = []
    for i in temp:
        if '"' in i:
            phrases.append(i)
        else:
            regular += i.split(' ')
    ALL = []

    for i in phrases:
        ALL += [' '.join(tknize(i))]
    for i in regular:
        ALL += tknize(i)

    #calling the doc grabber

    grabber = gd.DocGrabber(ALL, path)
    grabbed, grabbed4Jacy = grabber.grab_relavent()
    #getting the weighted query
    score = scoreQuery(ALL, N, path, grabbed4Jacy)
    #ranking the docs
    stuff = rank.rank(score, grabbed, numDocs)
    for thing in stuff:
        doc, score = thing
        if sys.argv[3].lower() == "y":
            print(doc + "\t" + str(score))
        else:
            print(doc)
Ejemplo n.º 27
0
del andata2
gc.collect()

expression = andata.to_df()
agg_nw = np.zeros([genes.shape[0], genes.shape[0]])
metacell_values = metacell_assignment['x'].unique()
logging.info(np.max(metacell_values))
for metacell in metacell_values:
    logging.info(metacell)

    #Generate Mask for slicing
    mask = metacell_assignment['x'] == metacell

    if mask.sum() < 20:
        logging.info(f'{metacell} too small')
        del mask
        gc.collect()
        continue

    #Expression is DataFrame of cells x genes
    data = expression[mask].values.T
    nw = create_nw(data, True)
    agg_nw += nw
    del nw, mask, data
    gc.collect()

dataset_nw = pd.DataFrame(rank(agg_nw), index=genes, columns=genes)
fn = f'/home/bharris/biccn_paper/data/networks/metacells/metacell_agg_nw_{dataset}_{args.outname}.hdf5'
logging.info(f'writing to : {fn}')
dataset_nw.to_hdf(fn, 'nw')
Ejemplo n.º 28
0
def evaluate(args, outdir, split):

    # Load Model
    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)
    output_model_file = os.path.join(outdir, WEIGHTS_NAME)
    output_config_file = os.path.join(outdir, CONFIG_NAME)
    config = BertConfig(output_config_file)
    model = BertScoring(config)
    model.load_state_dict(torch.load(output_model_file))
    model.to(args.device)
    n_gpu = torch.cuda.device_count()
    logger.info("device: {} n_gpu: {}".format(args.device, n_gpu))
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Data
    evalDataObject = RetrievalDataset(args,
                                      split=split,
                                      tokenizer=tokenizer,
                                      istrain=False)
    eval_dataloader = torch.utils.data.DataLoader(evalDataObject,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=args.workers)

    # Run prediction
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", eval_dataloader.__len__())
    logger.info("  Batch size = %d", args.eval_batch_size)
    model.eval()
    scores1 = np.zeros((evalDataObject.num_samples, 1))
    scores2 = np.zeros((evalDataObject.num_samples, 1))
    idxq = np.zeros((evalDataObject.num_samples, 1))
    labels = np.zeros((evalDataObject.num_samples, 1))
    firstidx = 0
    for _, batch in enumerate(tqdm(eval_dataloader, desc="Iteration")):
        input_ids, input_mask, segment_ids, id_q, truelabel = batch
        input_ids = input_ids.to(args.device)
        input_mask = input_mask.to(args.device)
        segment_ids = segment_ids.to(args.device)

        with torch.no_grad():
            scores = model(input_ids, segment_ids, input_mask)

        scores = scores.detach().cpu().numpy()
        numSamplesBatch = truelabel.numpy().shape[0]
        scores1[firstidx:firstidx + numSamplesBatch, 0] = scores[:, 0]
        scores2[firstidx:firstidx + numSamplesBatch, 0] = scores[:, 1]
        idxq[firstidx:firstidx + numSamplesBatch, 0] = id_q.numpy()
        labels[firstidx:firstidx + numSamplesBatch, 0] = truelabel.numpy()
        firstidx = firstidx + numSamplesBatch

    utils.save_obj(
        scores1, os.path.join(args.data_dir,
                              'retieval_scores_%s.pckl' % split))
    utils.save_obj(
        idxq, os.path.join(args.data_dir, 'retrieval_idxq_%s.pckl' % split))
    utils.save_obj(
        labels, os.path.join(args.data_dir,
                             'retrieval_labels_%s.pckl' % split))
    medR1, recall1, medR2, recall2 = rank(scores1, scores2, idxq, labels)
    logger.info('Accuracy medR {medR:.2f}\t Recall {recall}'.format(
        medR=medR1, recall=recall1))
Ejemplo n.º 29
0
import numpy as np
from get_params import get_params
from build_database import build_database
from get_features import get_features
from rank import rank
from eval_rankings import eval_rankings
import warnings
warnings.filterwarnings("ignore")

#Extraccio dels parametres
params=get_params()
#Creacio de la base de dades
params['split']='train'
build_database(params)
params['split']='val'
build_database(params)
#Extraccio de les características
get_features(params)
#Calcul del ranking
rank(params)
#Evaluacio del ranking
ap_list=eval_rankings(params)
print "-Llista de Average Precission: "
print ap_list
print "\n"
print "-Mean Average Precission: "
print np.mean(ap_list)

ruta1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\images'
ruta2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train\\images'
savepath1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val'
savepath2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train'

build_database(ruta1,savepath1);
build_database(ruta2,savepath2);

get_features(ruta1,savepath1,savepath1);
get_features(ruta2,savepath2,savepath2);

savepath_principal=os.path.dirname(os.path.abspath(__file__))
features_val=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val'
features_train=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train'
rank(features_val,features_train,savepath_principal);

feat=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\Features.txt'
path_out=os.path.dirname(os.path.abspath(__file__))
labels=os.path.dirname(os.path.abspath(__file__))+'\\labels.txt'
classify(feat,path_out,labels)

path=os.path.dirname(os.path.abspath(__file__))
Gt_val_test=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt'
evaluate_ranking(path,Gt_val_test)

automatic_annot=os.path.dirname(os.path.abspath(__file__))+'\\classify.txt'
annotation=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt'
evaluate_classification(automatic_annot,annotation)

Ejemplo n.º 31
0
import numpy as np
from get_params import get_params
from build_database import build_database
from get_features import get_features
from rank import rank
from eval_rankings import eval_rankings
import warnings
warnings.filterwarnings("ignore")

#Extraccio dels parametres
params = get_params()
#Creacio de la base de dades
params['split'] = 'train'
build_database(params)
params['split'] = 'val'
build_database(params)
#Extraccio de les características
get_features(params)
#Calcul del ranking
rank(params)
#Evaluacio del ranking
ap_list = eval_rankings(params)
print "-Llista de Average Precission: "
print ap_list
print "\n"
print "-Mean Average Precission: "
print np.mean(ap_list)
Ejemplo n.º 32
0

def test_rank((m,n)=(8,4), alpha=0.5 ) :
    """
    Test rank finding routine
    """

    A = generate_A( (m,n) )
    #A = generate_A( (m,n), 'random' )
    #err = generate_A( (1,m), 'random' )
    
    #A[:,n-1] = A[:,0] + 0.1 * A[:,1]
    #A[:,n-1] = A[:,0] + 0.000001 * err
    print "A = \n", A

    therank,R = rank.rank(A)
    
    print "R = \n", R
    print "rank = ", therank
    
   

if __name__=='__main__':
    arg = sys.argv[1:]

    if (len(arg) > 0) and (arg[0] == 'ls') :
        print "doing leastsquares test"
        test_ls()
    elif (len(arg) > 0) and (arg[0] == 'rank') :
        print "doing rank test"
        test_rank()
Ejemplo n.º 33
0
diffused_keys = {}

def diffuse(k):
    k *= M
    return k & ((1 << bits) - 1)

for k in keys:
    d = diffuse(k)
    assert d not in diffused_keys
    diffused_keys[diffuse(k)] = k

for k, v in diffused_keys.iteritems():
    r = k / side
    assert square[k % side][r] == NOT_A_VALUE
    square[k % side][r] = rank(v)

length = 0

for i in xrange(0, len(offset)):
    for j in xrange(0, len(ranks)):
        collision = False
        for k in xrange(0, side):
            s = square[k][i]
            h = ranks[j+k]
            collision = (s != NOT_A_VALUE and h != NOT_A_VALUE and s != h)
            if collision: break
        if not collision:
            offset[i] = j
            for k in xrange(0, side):
                s = square[k][i]
Ejemplo n.º 34
0
def get_texts(json_object):
    """
    Parsing logic for getting texts
    """
    texts = list()
    texts.append(json_object.get(FIELD_NAMES_CITIES['text_field']))
    return texts

def get_annotated_list(json_object):
    """
    Parsing logic for getting annotated field
    """
    return json_object.get(FIELD_NAMES_CITIES['annotated_field'])

embeddings_dict = read_embedding_file(EMBEDDINGS_FILE)

classifier = train_ranker.train_ranker(embeddings_dict, TRAINING_FILE_CITIES, FIELD_NAMES_CITIES)

with codecs.open(ACTUAL_FILE_CITIES, 'r', 'utf-8') as f:
    for line in f:
        obj = json.loads(line)
        list_of_texts = get_texts(obj)
        annotated_list = get_annotated_list(obj)
        print "Annotated tokens:",
        print annotated_list
        ranked_list = rank.rank(embeddings_dict, list_of_texts, annotated_list, classifier)
        print "Ranked List:",
        print ranked_list

Ejemplo n.º 35
0
# rather than a list. In this case, convert it to a list with one element
searchresults = processresults.processResults(resultsdict, searchresults)
resultcount += nr_of_results

#-----------------------------------
# PART 4: Rank results
#-----------------------------------

# determine relevance	
searchresults = rank.calculateRelevance(searchresults, recommendation, majorterms)

# calculate scientific strength
searchresults = rank.calculatestrength(searchresults)
	
# create sorted list of article evidence
outstring, c, goals_found, top25 = rank.rank(searchresults, evidence, goals)

out.write(outstring)
print "Total number of search results: " + str(resultcount)
print "Percentage of goal articles found:"
print str((goals_found/len(goals)) * 100) + "%"

print "Percentage of found goal articles in top 25: "
if goals_found == 0:
	print "NA"
else:
	print str((top25/goals_found * 100)) + "%"
	
out.write("</body></html")

out.close()
Ejemplo n.º 36
0
    if len(ps) > 0:
        cur_P = ps[0]
        for j in xrange(1, len(ps)):
            for n in ps[j]:
                cro = [1 for m in cur_P if n < m]
                crossings += sum(cro)
    return crossings


if __name__ == "__main__":
    import graph
    import rank


    length = 8
    edges = [[0, 1], [0, 2], [1, 4], [1, 5], [2, 3], [4, 6], [3, 7], [5, 6]]

    g = graph.Graph(edges, length)

    rank.rank(g)

    order = ordering(g)

    print crossing(order)

    for o in order:
        s = "Rank: "
        for v in o:
            s += " " + str(v.index) + " "
        print s
Ejemplo n.º 37
0
    #print(distVector,indicate)
    a = argsort(distVector)  #a代表代表按照拥挤度排好序个体序的元素
    dist_indicate = indicate[argsort(distVector)].tolist()
    #print(dist_indicate)

    return dist_indicate[::-1]


if __name__ == "__main__":
    funScore = array([[1, 2], [2, 3], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2],
                      [2, 2], [2, 2], [2, 2], [3, 2], [4, 3], [2, 1], [3, 1],
                      [3, 2], [3, 3], [3, 4]])
    layerDict = {1: [4, 9], 2: [8], 3: [1, 3, 7], 4: [2, 6], 5: [0, 5]}
    d = dominance(funScore)
    print(d)
    r = rank(d)
    print(r)
    #print(crowddist(funScore, layerDict[3]))
    print(crowddist(funScore, r[2]))
'''
    p = population(100, 4)
    #print(p)
    f = fitness_pop(p)
    #print(f)
    d = dominance(f)
    print("支配关系字典d:",d)
    r = rank(d)
    print("分层字典r:",r)
    c = crowddist(f,r[5])
    print(c)
'''