def p_comparison(p): """comparison : cterm EQ cterm | cterm NOTEQ cterm | cterm LT cterm | cterm GT cterm | cterm LTEQ cterm | cterm GTEQ cterm | cterm LIKE STRING_LITERAL | cterm CLIKE STRING_LITERAL | cterm IS NULL | cterm IS NOT NULL""" if len(p) == 4: if p.slice[2].type == 'EQ': p[0] = Comparison(Comparison.EQ, p[1], p[3]) elif p.slice[2].type == 'NOTEQ': p[0] = Comparison(Comparison.NOTEQ, p[1], p[3]) elif p.slice[2].type == 'LT': p[0] = Comparison(Comparison.LT, p[1], p[3]) elif p.slice[2].type == 'GT': p[0] = Comparison(Comparison.GT, p[1], p[3]) elif p.slice[2].type == 'LTEQ': p[0] = Comparison(Comparison.LTEQ, p[1], p[3]) elif p.slice[2].type == 'GTEQ': p[0] = Comparison(Comparison.GTEQ, p[1], p[3]) elif p.slice[2].type == 'LIKE': p[0] = Comparison(Comparison.LIKE, p[1], CompareTerm(p[3])) elif p.slice[2].type == 'CLIKE': p[0] = Comparison(Comparison.CLIKE, p[1], CompareTerm(p[3])) elif p.slice[2].type == 'CLIKE': p[0] = Comparison(Comparison.IS_NULL, p[1], None) else: p[0] = Comparison(Comparison.IS_NOT_NULL, p[1], None)
def __init__(self, sigma, wm_c, max_ar, sem_dim): Comparison.__init__(self, sigma) self.__wm_c = wm_c self.__max_ar = max_ar self.__sem_dim = sem_dim self.__n_search = int(spm.factorial(wm_c)) self.__create_graph()
def test_match(self): """Тест1""" data = [ (u'все включено', u'скачать все включено сейчас', 1.0), (u'все включено', u'All inclusive, или Всё включено', 1.0), (u'все включено', u'All inclusive, или Всё включено (2011) » Новое Кино\ смотреть онлайн скачать бесплатно', 1.0), (u'все включено', u'мотреть фильм Все включено онлайн бесплатно\ без регистрации', 1.0), (u'все включено', u'All inclusive или Все включено (2011). Фильм.', 1.0), (u'все включено', u'Все включено! (2011) - All inclusive или Все включено! - информация о фильме - российские фильмы и сериалы - Кино-Театр.РУ', 1.0), (u'все включено', u'ВСЕ ВКЛЮЧЕНО 2011 » СМОТРЕТЬ ОНЛАЙН БЕСПЛАТНО', 1.0), (u'все включено', u'Фильм All inclusive, или Всё включено смотреть онлайн бесплатно в хорошем качестве', 1.0), (u'все включено', u'Все включено Онлайн фильм, Смотреть фильмы онлайн бесплатно, Кино онлайн', 1.0), (u'все включено', u'Смотреть онлайн All inclusive, или Всё включено | Комедии | Фильм All inclusive, или Всё включено смотреть бесплатно на Films-Online-net.ru', 1.0), (u'все включено', u'Фильм «Все включено», Эдуард Радзюкевич, Михаил Беспало', 1.0), (u'все включено', u'Фильм All inclusive, или Всё включено отзывы и рецензия', 1.0), (u'все включено', u'ильм All inclusive, или Всё включено смотреть онлайн бесплатно в хорошем hd качестве', 1.0) ] comparison = Comparison() for entry in data: val = comparison.match(entry[0], entry[1]) print entry[0], entry[1], val self.assertTrue(val >= entry[2])
def compare_all_masters(self): comparison_list = [] for master in self.masters: comparison = Comparison(master, self) db1 = master.databases[0]['name'] db2 = self.databases[0]['name'] print("DB1: {}\tDB2: {}".format(db1, db2)) master_data = comparison.get_comparison(db1, db2) comparison_list.append(master_data) slave_dict = self.host_info() slave_dict['token'] = self.token slave_dict['masters'] = comparison_list return slave_dict
def compare_books(self, user_id): """ Compare books between the authenticated user and another. """ if not self.session: raise GoodreadsSessionError("No authenticated session.") data_dict = self.session.get('user/compare/' + user_id, {'format': 'xml'}) return Comparison(data_dict['compare'])
def from_dict(cls, kv): comment = kv.get('comment') raw_date = kv.get('date') if raw_date is None: date = None else: date = datetime.strptime(raw_date, date_format) from comparison import Comparison return cls(Comparison.from_dict(kv), date, comment)
def __init__(self, articles): #Holds all the articles from the web scraper self.articles = articles #holds all the pairs of articles that are found to be similar self.comparisons = [] #Holds all the keywords that appeared as matches self.keywords = [] #Check each article's keywords against all the other keywords to find similar articles for article in self.articles: for otherArticle in self.articles: #ignore the same article which will be a perfect match if (article.title != otherArticle.title): #Finds the similar keywords between articles simKeys = set(article.keywords) & set( otherArticle.keywords) #We chose 4 by comapring what actual similar articles returned if (len(simKeys) >= 5): #Tracks whether or not we've already found this match of articles before alreadyMatched = False #Check all the comparisons to make sure we haven't already found it for comparison in self.comparisons: if (comparison.getFirstArticle().title == article.title or comparison.getSecondArticle().title == article.title): alreadyMatched = True break #A comparison tha has already been matched should not be added to control duplicates if not alreadyMatched: self.comparisons.append( Comparison(article, otherArticle, simKeys))
def NAND(*args): return Comparison().NAND(*args)
def OR(*args): return Comparison().OR(*args)
def AND(*args): return Comparison().AND(*args)
def NOR(*args): return Comparison().NOR(*args)
border= #train.pyで学習したモデルのダウンロード #from train import train #model=Train.train() #保存したいとき #model.save("name.gz") #フォルダからpoincareモデル(epoch=100)のダウンロード from gensim.models.poincare import PoincareModel,PoincareRelations model=PoincareModel.load("../data/poincare_1.gz") #比較する文章のデータベースの作成 from database import Database database,id2doc=Database.database() #対象の文章の読み込み import glob path=glob.glob("../data/target_data/*") path=path[0] f = open(path) text=f.read() f.close() #入力(対象の文章、判別モデル、文章のデータベース、しきい値)から出力(類似度がしきい値以下の似てる文章を昇順で)を出す from comparison import Comparison ind=Comparison.comparison(text,database,model,border) for i in ind: print(id2doc[i])
def score(self, spaceagg, timeagg, store_minimum=False, pp_model=None, quantile=''): """ Read the obs and clim. make a comparison object which computes the scores in the dask dataframe. This dask dataframe is exported. Returns a list with intermediate filenames of the raw, climatological and corrected scores. Has a post-processing step if the pp_model is supplied. Fit is the same regardless of the quantile, so done only once. If there are no quantiles to predict or binary variable, we force equidistant sampling (random = True led to overestimations of the crps) """ alignment = ForecastToObsAlignment(season=self.season, cycle=self.cycle) alignment.recollect(booksname=self.log.loc[(spaceagg, timeagg), ('booksname', '')]) climatology = Climatology( self.basevar, **{ 'name': self.log.loc[(spaceagg, timeagg), ('climname', quantile)] }) climatology.localclim( ) # loading in this case. Creation was done in the makeclim method. if not self.log.loc[(spaceagg, timeagg), ( 'modelclimname', [quantile] )].isna().any( ): # Supply model quantile climatology if that was computed earlier. Will be preferred for the raw briescoring in the comparison Class modelclimatology = ModelClimatology( cycle=self.cycle, variable=self.basevar, **{ 'name': self.log.loc[(spaceagg, timeagg), ('modelclimname', quantile)] }) modelclimatology.local_clim() assert self.newvar == 'anom', 'This modelclimatology has likely no adapted units, only when anomalies the quantiles in Kelvin will be compatible with the aligned forecast anomalies in Celsius.' else: modelclimatology = None comp = Comparison(alignment=alignment, climatology=climatology, modelclimatology=modelclimatology) # Fitting or accepting external fits (meaning the column is already filled): if not pp_model is None: if not isinstance( self.log.loc[(spaceagg, timeagg), ('externalfits', quantile)], str): comp.fit_pp_models(pp_model=pp_model, groupers=['leadtime', 'clustid']) firstfitname = comp.export(fits=True, frame=False) self.log.loc[(spaceagg, timeagg), ( 'externalfits', slice(None) )] = firstfitname # Specifically useful for the looping over quantiles. else: fitname = self.log.loc[(spaceagg, timeagg), ('externalfits', quantile)] print('loading fit from:', fitname) comp.fits = dd.read_hdf( comp.basedir + fitname + '.h5', key='fits') # Loading of the fits of the first quantile. comp.fitgroupers = ['leadtime', 'clustid'] # Going to the scoring. if isinstance(quantile, float): if not pp_model is None: comp.make_pp_forecast(pp_model=pp_model) comp.brierscore() else: if not pp_model is None: comp.make_pp_forecast(pp_model=pp_model, random=False, n_members=self.ndraws if isinstance( pp_model, NGR) else None) comp.export(fits=False, frame=False, preds=True) if (self.newvar is None) or (self.newvar == 'anom'): comp.crpsscore() else: # Meaning a custom binary predictand comp.brierscore() scorefile = comp.export(fits=False, frame=True, store_minimum=store_minimum) return (scorefile)