def calc_similarity(kpi,
                    similarity_method='ecc',
                    weights=(0.5, 0.5),
                    default_dict=dict(),
                    default_value=0):
    # 两两算相似度, Similarity(score) * weights[0] + Similarity(value) * weights[1] 作为结果
    '''k1 = random.sample(list(kpi.keys()), 1)[0]
    v1 = kpi[k1]
    similarity_dict=dict()
    similarity_dict[k1]=dict([[tt, []] for tt in kpi.keys()])
    '''
    similarity_dict = default_dict if isinstance(default_dict,
                                                 dict) else dict()

    for k1, v1 in kpi.items():
        for k2, v2 in kpi.items():
            if (not k1 in similarity_dict.keys()):
                similarity_dict[k1] = dict()
            if (not k2 in similarity_dict.keys()):
                similarity_dict[k2] = dict()
            if k1 == k2:
                similarity_dict[k1][k1] = [
                    default_value, default_value, default_value
                ]
            elif k2 in similarity_dict.keys() and k1 in similarity_dict[k2]:
                continue
            else:
                T0 = time.time()
                print([k1, k2], end=" ")
                s1 = Similarity(v1.value.values,
                                v2.value.values).use_method(similarity_method)
                s2 = Similarity(v1.score.values,
                                v2.score.values).use_method(similarity_method)
                print("[{1},{2}]:{0}".format(time.time() - T0, s1, s2),
                      flush=True)
                similarity_dict[k1][k2] = [
                    s1, s2, s1 * weights[0] + s2 * weights[1]
                ]
                similarity_dict[k2][k1] = [
                    s1, s2, s1 * weights[0] + s2 * weights[1]
                ]
    print('finish calculate similarity. current dict size reaches {0} ({1})'.
          format(len(similarity_dict.keys()), time.asctime()),
          flush=True)
    return similarity_dict
def print_equalities():
    Constants.SIMILARITY = Similarity()
    Constants.PARAPHRASE = True
    sentences = read_test_data(r"C:\Users\Nikolaj\PycharmProjects\LitteralJapaneseTranslation\data\sentences_dev.txt")
    for sentence in sentences:
        print([t.english for t in sentence.tokens])
        translations = LiteralJapanese.translate(sentence.japanese)
        print([t.english for t in translations])
        for translation in translations:
            for token in sentence.tokens:
                (equal,rule) = Equality.equals_rule(token.english,translation[1])
                if equal:
                    print(token.english + " = " + translation.english + "\t" + "("+rule+")")
Exemple #3
0
def similarity():
    datatext = DataText()
    datatext.setPTBData()
    cooccurrence = Cooccurrence(datatext, 2)
    cooccurrence.createMatrix()
    cooccurrence.calcPMIfromCoMatrix()
    cooccurrence.reduceDimensions(100)
    similarity = Similarity(datatext, cooccurrence.matrix)

    print("n_sequense: ", len(datatext.words))
    print("n_words: ", datatext.n_words)
    querys = ["you", "year", "car", "toyota"]
    for query in querys:
        print("query: ", query)            
        similarity.rankSimilarity(query)
def main():
    similarity = Similarity()

    pg.init() 

    display_width = 740
    display_height = 480

    clock = pg.time.Clock()
    gameDisplay = pg.display.set_mode([display_width,display_height])
    pg.display.set_caption('Face Dance Machine') 
    
    faceDanceMachine = FaceDanceMachine(gameDisplay, similarity)
    faceDanceMachine.run()
    pg.quit()
Exemple #5
0
def create_statistic(data_path):
    #Init classes
    sim = Similarity()

    #Load vectoro to data frame pandas
    data_frame =  pd.DataFrame.from_dict(data_path)



    #Loop para filtrar as perguntas não respondidas 
    nao_respondidas = []
    #Padrão de resposta esperada
    i_dont_know = 'I don\'t know how to respond this'
    for index, row in data_frame.iterrows():
        val = sim.symmetric_sentence_similarity(i_dont_know, row.response)
        if(val > 0.88):
            nao_respondidas.append([row.input, 1 ])

    total_nao_respondidas = len(nao_respondidas)

    # considerei uma ativação de 0.55 assim consideramos os mesmos temas
    i = 0
    for ask in nao_respondidas:
        j = 0
        for ask1 in nao_respondidas:
            val = sim.symmetric_sentence_similarity(ask[0], ask1[0])
            if(val > 0.55): #similar remove e soma na primeira
                if(i != j):
                    del nao_respondidas[j]
                    deletado = True
                else:
                    deletado = False
                    j += 1
                ask[1] += 1
        if not deletado:
            i += 1

    #Convert Array to Numpy Array
    np_nao_respondidas = np.array(nao_respondidas, dtype="O")
    #Sort Numpy Array 
    np_nao_respondidas.sort(axis=0,kind='heapsort')
    #Cut in 10 first
    top_10 = np_nao_respondidas.tolist()[-10:][::-1]

    response = {"total": total_nao_respondidas, "top": top_10}
    return response
    def __init__(self,
                 urls,
                 onchange=None):  #onchange is a function with onchange(url)

        ########################################################################
        #HYPERPARAMETERS
        self.datapath = "./data"
        self.periode = 10 * 60  #check every hour
        self.deltastart = 3  #start thread with a delta time of 3 sec
        self.max_change_ratio = 0.10
        ########################################################################

        super(PagesChecker, self).__init__()
        self.isrunning = False
        self.urls = urls
        self.onchange = onchange
        self.threads = []
        self.sm = Similarity()

        if not os.path.exists(self.datapath) or os.path.exists(
                self.datapath) and not os.path.isdir(self.datapath):
            os.mkdir(self.datapath, 0o755)
Exemple #7
0
import traceback
from flask import Flask, request, jsonify
from Similarity import Similarity

app = Flask(__name__)
sim = Similarity()


@app.route('/find_person', methods=['POST'])
def find_person():
    result = {}
    result['status'] = 1
    try:
        fp = request.files['img']
        person_id, face_sim = sim.find_person(fp)

        data = {}
        data['person_id'] = person_id
        data['face_sim'] = face_sim
        result['data'] = data
    except BaseException as e:
        traceback.print_exc()
        result['err'] = "{}".format(e)
        type = 0  # error
        result['status'] = type
    return jsonify(result)


@app.route('/rm_person', methods=['POST'])
def rm_person():
    result = {}
Exemple #8
0
    print "Loading Data"
    mentions = [
        convert_json(json.loads(x)) for x in open("Data/Mentions.json")
    ]
    mid_mention = dict([(x['mid'], x) for x in mentions])
    pairs = json.load(
        open("/afs/inf.ed.ac.uk/group/teaching/anlp/asgn3/Train_Test_Pairs"))
    mid_eid = dict([
        apply(lambda x, y: (int(x), y),
              line.strip().split("\t"))
        for line in open("/afs/inf.ed.ac.uk/group/teaching/anlp/asgn3/mid_eid")
    ])
    loaded = True

print "Computing Training Similarities"
s = Similarity()
labels = []
features = []
for (mid0, mid1) in pairs['training']:
    labels.append(int(mid_eid[mid0] == mid_eid[mid1]))
    features.append(s.compute(mid_mention[mid0], mid_mention[mid1]))
#Adding features here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# A little hack so that first class is always 1:
labels.insert(0, 1)
features.insert(0, {})

print "Active Similarities:"
print "\t".join(reduce(lambda x, y: x | y, [set(x.keys()) for x in features]))

print "Training"
# We need to convert string feature names to feature numbers for liblinear:
    def build(self):
        log.info('building rnn cell....')
        if self.cell == 'gru':
            recurent_x = GRU(self.rng, self.n_input, self.n_hidden, self.x,
                             self.E, self.xmask, self.is_train, self.dropout)

            recurent_y = GRU(self.rng, self.n_input, self.n_hidden, self.y,
                             self.E, self.ymask, self.is_train, self.dropout)
        elif self.cell == 'lstm':
            recurent_x = LSTM(self.rng, self.n_input, self.n_hidden, self.x,
                              self.E, self.xmask, self.is_train, self.dropout)

            recurent_y = LSTM(self.rng, self.n_input, self.n_hidden, self.y,
                              self.E, self.ymask, self.is_train, self.dropout)
        log.info('build the sim matrix....')
        sim_layer = Similarity(recurent_x.activation,
                               recurent_y.activation,
                               metrics=self.sim)

        log.info('building convolution pooling layer....')
        conv_pool_layer = ConvPool(
            input=sim_layer.activation,
            filter_shape=(2, 1, 3, 3),  # feature_maps, 1, filter_h, filter_w
            input_shape=(self.batch_size, 1, 50,
                         50))  #sim_layer.activation.shape)
        projected_layer = basicLayer(conv_pool_layer.activation,
                                     input_shape=1152)
        rav_cost = T.nnet.binary_crossentropy(projected_layer.activation,
                                              self.label)
        cost = T.mean(rav_cost)
        acc = T.eq(projected_layer.activation > 0.5, self.label)
        log.info('cost calculated.....')

        self.params = [
            self.E,
        ]
        self.params += recurent_x.params
        self.params += recurent_y.params
        self.params += conv_pool_layer.params
        self.params += projected_layer.params

        lr = T.scalar('lr')
        gparams = [T.clip(T.grad(cost, p), -3, 3) for p in self.params]
        #gparams = [T.grad(cost, p) for p in self.params]

        if self.optimizer == 'sgd':
            updates = sgd(self.params, gparams, lr)
        elif self.optimizer == 'adam':
            updates = adam(self.params, gparams, lr)
        elif self.optimizer == 'rmsprop':
            updates = rmsprop(self.params, gparams, lr)

        log.info('gradient calculated.....')

        self.train = theano.function(
            inputs=[self.x, self.xmask, self.y, self.ymask, self.label, lr],
            outputs=[cost, acc],
            updates=updates,
            givens={self.is_train: np.cast['int32'](1)})

        self.predict = theano.function(
            inputs=[self.x, self.xmask, self.y, self.ymask, self.label],
            outputs=[rav_cost, acc],
            givens={self.is_train: np.cast['int32'](0)})

        self.test = theano.function(
            inputs=[self.x, self.xmask, self.y, self.ymask],
            outputs=projected_layer.activation,
            givens={self.is_train: np.cast['int32'](0)})
Exemple #10
0
 def _init_Sim(self):
     self.sim = Similarity()
     self.shelf_db = shelve.open('image_db.dat')
Exemple #11
0
#responseDoc = fileName5;

#requestDoc = 'abstract.txt'
requestDoc = fileName5

readDoc = ReadDocument()
print("Reading input document")
fileContent = readDoc.readFile(requestDoc)
keywords = readDoc.getKeywords(fileContent)

#print(keywords)
#print(len(keywords))

webSearch = WebSearch()
print("crawling the web")
webSearch.google_search(keywords)
#webSearch.trialSearch()

checkSimilarity = Similarity()
print("checking similarity")
value = checkSimilarity.similarValue(requestDoc, responseDoc)
f = open(responseDoc, 'r+')
f.truncate()
print("----------SIMILARITY-----------")
#print(value)
resultValue = str(round((value * 100), 2)) + "%"
#print(str(round((value*100),2))+"%")
print(resultValue)
file = open('similarityValue.txt', 'w', encoding='utf-8')
file.write(str(resultValue))
print("-------------------------------")