def __init__(self, TM, recreate = False, predict_ratio = 1.0): """Create object from dataset using TM as trustmetric. predict_ratio is the part of the edges that will randomly be picked for prediction. NB: The save format for wiki, is different from the save format for Advogato, and other datasets""" Network.__init__(self, make_base_path = False,silent=TM.dataset.silent) self.name = 'WikiCalcGraph' self.TM = TM self.dataset = dataset = TM.dataset self.predict_ratio = predict_ratio self._cachedict = {'network':'Pred'+self.dataset._name(),'date':self.dataset.date,'lang':self.dataset.lang} self.start_time = time.time() if hasattr(dataset, "filepath"): self.path = os.path.join(os.path.split(dataset.filepath)[0], path_name(TM)) tp=relative_path(self.path,'datasets' ) if tp: self.path = os.path.join( os.path.split(tp[0])[0] ,'shared_datasets',tp[1]) if hasattr(TM,"noneToValue") and TM.noneToValue: self.path = os.path.join(self.path,'noneTo'+TM.defaultPredict) if not os.path.exists(self.path): mkpath(self.path) self.__set_filepath() self.basePath,self.relpath = relative_path( self.filepath, 'shared_datasets' ) self.url = os.path.join( 'http://www.trustlet.org/trustlet_dataset_svn/', os.path.split( self.relpath )[0] ) self.filename = os.path.split(self.filepath)[1] if not recreate and os.path.exists(self.filepath): #if in cache file doesn't exist the dataset with the right keys #create it and save it if not self._readCache(self.filepath): sys.stderr.write( "I can't find dataset with threshold "+self.dataset.threshold+" and bots set to"+self.dataset.bots+"\n" ) graph = self._generate() self._writeCache(graph) else: graph = self._generate() self._writeCache(graph) self._set_arrays() self._prepare() if hasattr(self.TM, 'rescale') and self.TM.rescale: self._rescale() if not self.silent: print "Init took", hms(time.time() - self.start_time)
def __init__(self, TM, recreate = False, predict_ratio = 1.0): """Create object from dataset using TM as trustmetric. predict_ratio is the part of the edges that will randomly be picked for prediction.""" Network.__init__(self, make_base_path = False,silent=TM.dataset.silent) self.name = 'CalcGraph' self.TM = TM self.dataset = dataset = TM.dataset self.predict_ratio = predict_ratio self._cachedict = {'network':'Pred'+TM.dataset._name(), 'date':TM.dataset.date} self.start_time = time.time() if hasattr(dataset, "filepath"): path = os.path.join(os.path.split(dataset.filepath)[0], path_name(TM)) if dataset._name() == 'Weighted' or self._name() == '' or self._name() == 'Dummy' or self._name() =='Dummyweighted': self.path = path else: (home,rel)=relative_path(path,'datasets' ) self.path = os.path.join( os.path.split(home)[0] ,'shared_datasets',rel) #not necessary #if hasattr(TM,"noneToValue") and TM.noneToValue: # self.path = os.path.join(self.path,'noneTo'+TM.defaultPredict) mkpath(self.path) self.filepath = os.path.join(self.path, get_name(self) + '.dot') #splits path, PGPath is the path to c2 dataset, relative to datasets folder, #relpath is the absolutepath to datasets folder if 'shared_datasets' in self.filepath: self.basePath,relpath = relative_path( self.filepath, 'shared_datasets' ) elif 'datasets' in self.filepath: self.basePath,relpath = relative_path( self.filepath, 'datasets' ) else: raise Exception("Malformed path ("+self.filepath+")! it must contain 'datasets' or 'shared_datasets' folder") self.relpath = relpath #path to dataset relative to svn directory self.url = os.path.join( 'http://www.trustlet.org/trustlet_dataset_svn/', os.path.split( relpath )[0] ) self.filename = os.path.split(self.filepath)[1] if not recreate and (os.path.exists(self.filepath) or os.path.exists(self.filepath+'.bz2')): self._read_dot(self.filepath) else: graph = self._generate() self._write_pred_graph_dot(graph) self._set_arrays() self._prepare() if hasattr(self.TM, 'rescale') and self.TM.rescale: self._rescale() if not self.dataset.silent: print "Init took", hms(time.time() - self.start_time)
def __init__(self, TM, recreate=False, predict_ratio=1.0): """Create object from dataset using TM as trustmetric. predict_ratio is the part of the edges that will randomly be picked for prediction. NB: The save format for wiki, is different from the save format for Advogato, and other datasets""" Network.__init__(self, make_base_path=False, silent=TM.dataset.silent) self.name = 'WikiCalcGraph' self.TM = TM self.dataset = dataset = TM.dataset self.predict_ratio = predict_ratio self._cachedict = { 'network': 'Pred' + self.dataset._name(), 'date': self.dataset.date, 'lang': self.dataset.lang } self.start_time = time.time() if hasattr(dataset, "filepath"): self.path = os.path.join( os.path.split(dataset.filepath)[0], path_name(TM)) tp = relative_path(self.path, 'datasets') if tp: self.path = os.path.join( os.path.split(tp[0])[0], 'shared_datasets', tp[1]) if hasattr(TM, "noneToValue") and TM.noneToValue: self.path = os.path.join(self.path, 'noneTo' + TM.defaultPredict) if not os.path.exists(self.path): mkpath(self.path) self.__set_filepath() self.basePath, self.relpath = relative_path( self.filepath, 'shared_datasets') self.url = os.path.join( 'http://www.trustlet.org/trustlet_dataset_svn/', os.path.split(self.relpath)[0]) self.filename = os.path.split(self.filepath)[1] if not recreate and os.path.exists(self.filepath): #if in cache file doesn't exist the dataset with the right keys #create it and save it if not self._readCache(self.filepath): sys.stderr.write("I can't find dataset with threshold " + self.dataset.threshold + " and bots set to" + self.dataset.bots + "\n") graph = self._generate() self._writeCache(graph) else: graph = self._generate() self._writeCache(graph) self._set_arrays() self._prepare() if hasattr(self.TM, 'rescale') and self.TM.rescale: self._rescale() if not self.silent: print "Init took", hms(time.time() - self.start_time)
def __init__(self, TM, recreate=False, predict_ratio=1.0): """Create object from dataset using TM as trustmetric. predict_ratio is the part of the edges that will randomly be picked for prediction.""" Network.__init__(self, make_base_path=False, silent=TM.dataset.silent) self.name = 'CalcGraph' self.TM = TM self.dataset = dataset = TM.dataset self.predict_ratio = predict_ratio self._cachedict = { 'network': 'Pred' + TM.dataset._name(), 'date': TM.dataset.date } self.start_time = time.time() if hasattr(dataset, "filepath"): path = os.path.join( os.path.split(dataset.filepath)[0], path_name(TM)) if dataset._name() == 'Weighted' or self._name( ) == '' or self._name() == 'Dummy' or self._name( ) == 'Dummyweighted': self.path = path else: (home, rel) = relative_path(path, 'datasets') self.path = os.path.join( os.path.split(home)[0], 'shared_datasets', rel) #not necessary #if hasattr(TM,"noneToValue") and TM.noneToValue: # self.path = os.path.join(self.path,'noneTo'+TM.defaultPredict) mkpath(self.path) self.filepath = os.path.join(self.path, get_name(self) + '.dot') #splits path, PGPath is the path to c2 dataset, relative to datasets folder, #relpath is the absolutepath to datasets folder if 'shared_datasets' in self.filepath: self.basePath, relpath = relative_path(self.filepath, 'shared_datasets') elif 'datasets' in self.filepath: self.basePath, relpath = relative_path(self.filepath, 'datasets') else: raise Exception( "Malformed path (" + self.filepath + ")! it must contain 'datasets' or 'shared_datasets' folder" ) self.relpath = relpath #path to dataset relative to svn directory self.url = os.path.join( 'http://www.trustlet.org/trustlet_dataset_svn/', os.path.split(relpath)[0]) self.filename = os.path.split(self.filepath)[1] if not recreate and (os.path.exists(self.filepath) or os.path.exists(self.filepath + '.bz2')): self._read_dot(self.filepath) else: graph = self._generate() self._write_pred_graph_dot(graph) self._set_arrays() self._prepare() if hasattr(self.TM, 'rescale') and self.TM.rescale: self._rescale() if not self.dataset.silent: print "Init took", hms(time.time() - self.start_time)