def p_comparison(p): """comparison : cterm EQ cterm | cterm NOTEQ cterm | cterm LT cterm | cterm GT cterm | cterm LTEQ cterm | cterm GTEQ cterm | cterm LIKE STRING_LITERAL | cterm CLIKE STRING_LITERAL | cterm IS NULL | cterm IS NOT NULL""" if len(p) == 4: if p.slice[2].type == 'EQ': p[0] = Comparison(Comparison.EQ, p[1], p[3]) elif p.slice[2].type == 'NOTEQ': p[0] = Comparison(Comparison.NOTEQ, p[1], p[3]) elif p.slice[2].type == 'LT': p[0] = Comparison(Comparison.LT, p[1], p[3]) elif p.slice[2].type == 'GT': p[0] = Comparison(Comparison.GT, p[1], p[3]) elif p.slice[2].type == 'LTEQ': p[0] = Comparison(Comparison.LTEQ, p[1], p[3]) elif p.slice[2].type == 'GTEQ': p[0] = Comparison(Comparison.GTEQ, p[1], p[3]) elif p.slice[2].type == 'LIKE': p[0] = Comparison(Comparison.LIKE, p[1], CompareTerm(p[3])) elif p.slice[2].type == 'CLIKE': p[0] = Comparison(Comparison.CLIKE, p[1], CompareTerm(p[3])) elif p.slice[2].type == 'CLIKE': p[0] = Comparison(Comparison.IS_NULL, p[1], None) else: p[0] = Comparison(Comparison.IS_NOT_NULL, p[1], None)
def compare_books(self, user_id): """ Compare books between the authenticated user and another. """ if not self.session: raise GoodreadsSessionError("No authenticated session.") data_dict = self.session.get('user/compare/' + user_id, {'format': 'xml'}) return Comparison(data_dict['compare'])
def compare_all_masters(self): comparison_list = [] for master in self.masters: comparison = Comparison(master, self) db1 = master.databases[0]['name'] db2 = self.databases[0]['name'] print("DB1: {}\tDB2: {}".format(db1, db2)) master_data = comparison.get_comparison(db1, db2) comparison_list.append(master_data) slave_dict = self.host_info() slave_dict['token'] = self.token slave_dict['masters'] = comparison_list return slave_dict
def __init__(self, articles): #Holds all the articles from the web scraper self.articles = articles #holds all the pairs of articles that are found to be similar self.comparisons = [] #Holds all the keywords that appeared as matches self.keywords = [] #Check each article's keywords against all the other keywords to find similar articles for article in self.articles: for otherArticle in self.articles: #ignore the same article which will be a perfect match if (article.title != otherArticle.title): #Finds the similar keywords between articles simKeys = set(article.keywords) & set( otherArticle.keywords) #We chose 4 by comapring what actual similar articles returned if (len(simKeys) >= 5): #Tracks whether or not we've already found this match of articles before alreadyMatched = False #Check all the comparisons to make sure we haven't already found it for comparison in self.comparisons: if (comparison.getFirstArticle().title == article.title or comparison.getSecondArticle().title == article.title): alreadyMatched = True break #A comparison tha has already been matched should not be added to control duplicates if not alreadyMatched: self.comparisons.append( Comparison(article, otherArticle, simKeys))
def NAND(*args): return Comparison().NAND(*args)
def OR(*args): return Comparison().OR(*args)
def AND(*args): return Comparison().AND(*args)
def NOR(*args): return Comparison().NOR(*args)
def score(self, spaceagg, timeagg, store_minimum=False, pp_model=None, quantile=''): """ Read the obs and clim. make a comparison object which computes the scores in the dask dataframe. This dask dataframe is exported. Returns a list with intermediate filenames of the raw, climatological and corrected scores. Has a post-processing step if the pp_model is supplied. Fit is the same regardless of the quantile, so done only once. If there are no quantiles to predict or binary variable, we force equidistant sampling (random = True led to overestimations of the crps) """ alignment = ForecastToObsAlignment(season=self.season, cycle=self.cycle) alignment.recollect(booksname=self.log.loc[(spaceagg, timeagg), ('booksname', '')]) climatology = Climatology( self.basevar, **{ 'name': self.log.loc[(spaceagg, timeagg), ('climname', quantile)] }) climatology.localclim( ) # loading in this case. Creation was done in the makeclim method. if not self.log.loc[(spaceagg, timeagg), ( 'modelclimname', [quantile] )].isna().any( ): # Supply model quantile climatology if that was computed earlier. Will be preferred for the raw briescoring in the comparison Class modelclimatology = ModelClimatology( cycle=self.cycle, variable=self.basevar, **{ 'name': self.log.loc[(spaceagg, timeagg), ('modelclimname', quantile)] }) modelclimatology.local_clim() assert self.newvar == 'anom', 'This modelclimatology has likely no adapted units, only when anomalies the quantiles in Kelvin will be compatible with the aligned forecast anomalies in Celsius.' else: modelclimatology = None comp = Comparison(alignment=alignment, climatology=climatology, modelclimatology=modelclimatology) # Fitting or accepting external fits (meaning the column is already filled): if not pp_model is None: if not isinstance( self.log.loc[(spaceagg, timeagg), ('externalfits', quantile)], str): comp.fit_pp_models(pp_model=pp_model, groupers=['leadtime', 'clustid']) firstfitname = comp.export(fits=True, frame=False) self.log.loc[(spaceagg, timeagg), ( 'externalfits', slice(None) )] = firstfitname # Specifically useful for the looping over quantiles. else: fitname = self.log.loc[(spaceagg, timeagg), ('externalfits', quantile)] print('loading fit from:', fitname) comp.fits = dd.read_hdf( comp.basedir + fitname + '.h5', key='fits') # Loading of the fits of the first quantile. comp.fitgroupers = ['leadtime', 'clustid'] # Going to the scoring. if isinstance(quantile, float): if not pp_model is None: comp.make_pp_forecast(pp_model=pp_model) comp.brierscore() else: if not pp_model is None: comp.make_pp_forecast(pp_model=pp_model, random=False, n_members=self.ndraws if isinstance( pp_model, NGR) else None) comp.export(fits=False, frame=False, preds=True) if (self.newvar is None) or (self.newvar == 'anom'): comp.crpsscore() else: # Meaning a custom binary predictand comp.brierscore() scorefile = comp.export(fits=False, frame=True, store_minimum=store_minimum) return (scorefile)