Example #1
0
    def __init__(self, TM, recreate = False, predict_ratio = 1.0):
        """Create object from dataset using TM as trustmetric.
        predict_ratio is the part of the edges that will randomly be
        picked for prediction.
        NB: The save format for wiki, is different from the save format
            for Advogato, and other datasets"""
        Network.__init__(self, make_base_path = False,silent=TM.dataset.silent)
        self.name = 'WikiCalcGraph'
        self.TM = TM
        self.dataset = dataset = TM.dataset
        self.predict_ratio = predict_ratio
        self._cachedict = {'network':'Pred'+self.dataset._name(),'date':self.dataset.date,'lang':self.dataset.lang}

        self.start_time = time.time()
        
        if hasattr(dataset, "filepath"):
            self.path = os.path.join(os.path.split(dataset.filepath)[0],
                                     path_name(TM))

            tp=relative_path(self.path,'datasets' )
            if tp:
                self.path = os.path.join(  os.path.split(tp[0])[0]  ,'shared_datasets',tp[1])


            if hasattr(TM,"noneToValue") and TM.noneToValue:
                self.path = os.path.join(self.path,'noneTo'+TM.defaultPredict)
            if not os.path.exists(self.path):
                mkpath(self.path)
            
            self.__set_filepath() 
                
            self.basePath,self.relpath = relative_path( self.filepath, 'shared_datasets' )

            self.url = os.path.join( 'http://www.trustlet.org/trustlet_dataset_svn/', os.path.split( self.relpath )[0] )
            self.filename = os.path.split(self.filepath)[1]
                        
            if not recreate and os.path.exists(self.filepath):
                #if in cache file doesn't exist the dataset with the right keys
                #create it and save it
                if not self._readCache(self.filepath):
                    sys.stderr.write( "I can't find dataset with threshold "+self.dataset.threshold+" and bots set to"+self.dataset.bots+"\n" )
                    graph = self._generate()
                    self._writeCache(graph)
                
            else:
                graph = self._generate()
                self._writeCache(graph)
                
            self._set_arrays()
            self._prepare()
            if hasattr(self.TM, 'rescale') and self.TM.rescale:
                self._rescale()

        if not self.silent:
            print "Init took", hms(time.time() - self.start_time)
Example #2
0
    def __init__(self, TM, recreate = False, predict_ratio = 1.0):
        """Create object from dataset using TM as trustmetric.
        predict_ratio is the part of the edges that will randomly be
        picked for prediction."""
        Network.__init__(self, make_base_path = False,silent=TM.dataset.silent)
        self.name = 'CalcGraph'
        self.TM = TM
        self.dataset = dataset = TM.dataset
        self.predict_ratio = predict_ratio
        self._cachedict = {'network':'Pred'+TM.dataset._name(), 'date':TM.dataset.date}

        self.start_time = time.time()
        
        if hasattr(dataset, "filepath"):
            path = os.path.join(os.path.split(dataset.filepath)[0], path_name(TM))
            
            if dataset._name() == 'Weighted' or self._name() == '' or self._name() == 'Dummy' or self._name() =='Dummyweighted':
                self.path = path
            else:
                (home,rel)=relative_path(path,'datasets' )
                self.path = os.path.join(  os.path.split(home)[0]  ,'shared_datasets',rel)

            #not necessary
            #if hasattr(TM,"noneToValue") and TM.noneToValue:
            #    self.path = os.path.join(self.path,'noneTo'+TM.defaultPredict)
            mkpath(self.path)
            self.filepath = os.path.join(self.path, 
                                         get_name(self) + '.dot')
        
            #splits path, PGPath is the path to c2 dataset, relative to datasets folder,
            #relpath is the absolutepath to datasets folder
            if 'shared_datasets' in self.filepath:
                self.basePath,relpath = relative_path( self.filepath, 'shared_datasets' )
            elif 'datasets' in self.filepath:
                self.basePath,relpath = relative_path( self.filepath, 'datasets' )
            else:
                raise Exception("Malformed path ("+self.filepath+")! it must contain 'datasets' or 'shared_datasets' folder")

            self.relpath = relpath #path to dataset relative to svn directory
            self.url = os.path.join( 'http://www.trustlet.org/trustlet_dataset_svn/', os.path.split( relpath )[0] ) 
            self.filename = os.path.split(self.filepath)[1]

            if not recreate and (os.path.exists(self.filepath) or os.path.exists(self.filepath+'.bz2')):
                self._read_dot(self.filepath)
            else:
                graph = self._generate()
                self._write_pred_graph_dot(graph)
                
            self._set_arrays()
            self._prepare()
            if hasattr(self.TM, 'rescale') and self.TM.rescale:
                self._rescale()

        if not self.dataset.silent:
            print "Init took", hms(time.time() - self.start_time)
Example #3
0
    def __init__(self, TM, recreate=False, predict_ratio=1.0):
        """Create object from dataset using TM as trustmetric.
        predict_ratio is the part of the edges that will randomly be
        picked for prediction.
        NB: The save format for wiki, is different from the save format
            for Advogato, and other datasets"""
        Network.__init__(self, make_base_path=False, silent=TM.dataset.silent)
        self.name = 'WikiCalcGraph'
        self.TM = TM
        self.dataset = dataset = TM.dataset
        self.predict_ratio = predict_ratio
        self._cachedict = {
            'network': 'Pred' + self.dataset._name(),
            'date': self.dataset.date,
            'lang': self.dataset.lang
        }

        self.start_time = time.time()

        if hasattr(dataset, "filepath"):
            self.path = os.path.join(
                os.path.split(dataset.filepath)[0], path_name(TM))

            tp = relative_path(self.path, 'datasets')
            if tp:
                self.path = os.path.join(
                    os.path.split(tp[0])[0], 'shared_datasets', tp[1])

            if hasattr(TM, "noneToValue") and TM.noneToValue:
                self.path = os.path.join(self.path,
                                         'noneTo' + TM.defaultPredict)
            if not os.path.exists(self.path):
                mkpath(self.path)

            self.__set_filepath()

            self.basePath, self.relpath = relative_path(
                self.filepath, 'shared_datasets')

            self.url = os.path.join(
                'http://www.trustlet.org/trustlet_dataset_svn/',
                os.path.split(self.relpath)[0])
            self.filename = os.path.split(self.filepath)[1]

            if not recreate and os.path.exists(self.filepath):
                #if in cache file doesn't exist the dataset with the right keys
                #create it and save it
                if not self._readCache(self.filepath):
                    sys.stderr.write("I can't find dataset with threshold " +
                                     self.dataset.threshold +
                                     " and bots set to" + self.dataset.bots +
                                     "\n")
                    graph = self._generate()
                    self._writeCache(graph)

            else:
                graph = self._generate()
                self._writeCache(graph)

            self._set_arrays()
            self._prepare()
            if hasattr(self.TM, 'rescale') and self.TM.rescale:
                self._rescale()

        if not self.silent:
            print "Init took", hms(time.time() - self.start_time)
Example #4
0
    def __init__(self, TM, recreate=False, predict_ratio=1.0):
        """Create object from dataset using TM as trustmetric.
        predict_ratio is the part of the edges that will randomly be
        picked for prediction."""
        Network.__init__(self, make_base_path=False, silent=TM.dataset.silent)
        self.name = 'CalcGraph'
        self.TM = TM
        self.dataset = dataset = TM.dataset
        self.predict_ratio = predict_ratio
        self._cachedict = {
            'network': 'Pred' + TM.dataset._name(),
            'date': TM.dataset.date
        }

        self.start_time = time.time()

        if hasattr(dataset, "filepath"):
            path = os.path.join(
                os.path.split(dataset.filepath)[0], path_name(TM))

            if dataset._name() == 'Weighted' or self._name(
            ) == '' or self._name() == 'Dummy' or self._name(
            ) == 'Dummyweighted':
                self.path = path
            else:
                (home, rel) = relative_path(path, 'datasets')
                self.path = os.path.join(
                    os.path.split(home)[0], 'shared_datasets', rel)

            #not necessary
            #if hasattr(TM,"noneToValue") and TM.noneToValue:
            #    self.path = os.path.join(self.path,'noneTo'+TM.defaultPredict)
            mkpath(self.path)
            self.filepath = os.path.join(self.path, get_name(self) + '.dot')

            #splits path, PGPath is the path to c2 dataset, relative to datasets folder,
            #relpath is the absolutepath to datasets folder
            if 'shared_datasets' in self.filepath:
                self.basePath, relpath = relative_path(self.filepath,
                                                       'shared_datasets')
            elif 'datasets' in self.filepath:
                self.basePath, relpath = relative_path(self.filepath,
                                                       'datasets')
            else:
                raise Exception(
                    "Malformed path (" + self.filepath +
                    ")! it must contain 'datasets' or 'shared_datasets' folder"
                )

            self.relpath = relpath  #path to dataset relative to svn directory
            self.url = os.path.join(
                'http://www.trustlet.org/trustlet_dataset_svn/',
                os.path.split(relpath)[0])
            self.filename = os.path.split(self.filepath)[1]

            if not recreate and (os.path.exists(self.filepath)
                                 or os.path.exists(self.filepath + '.bz2')):
                self._read_dot(self.filepath)
            else:
                graph = self._generate()
                self._write_pred_graph_dot(graph)

            self._set_arrays()
            self._prepare()
            if hasattr(self.TM, 'rescale') and self.TM.rescale:
                self._rescale()

        if not self.dataset.silent:
            print "Init took", hms(time.time() - self.start_time)