Ejemplo n.º 1
0
	def __init__(self, dataset_dir, users_file='users.tsv.gz', excluded_users=set(), default_location_source='geo-median'):
		
		settings_fname = os.path.join(dataset_dir,'dataset.json')
		if os.path.exists(settings_fname):
			self._settings = jsonlib.load(open(settings_fname,'r'))
		else:
			self._settings = {}

		# prepare for all data
                self._dataset_dir = dataset_dir
		self._users_fname = os.path.join(dataset_dir, users_file)
		self._users_with_locations_fname = os.path.join(dataset_dir, 'users.home-locations.' + default_location_source + '.tsv.gz')
		self._mention_network_fname = os.path.join(dataset_dir, 'mention_network.elist')
		self._bi_mention_network_fname = os.path.join(dataset_dir, 'bi_mention_network.elist')
                self.excluded_users = excluded_users
Ejemplo n.º 2
0
	def __init__(self,dataset_dir, users_file=None):
		
		settings_fname = os.path.join(dataset_dir,'dataset.json')
		if os.path.exists(settings_fname):
			self._settings = jsonlib.load(open(settings_fname,'r'))
		else:
			self._settings = {}

		# prepare for all data
		self._posts_fname = os.path.join(dataset_dir,'posts.json.gz')
                if users_file is None:
                        self._users_fname = os.path.join(dataset_dir,'users.json.gz')
                else:
                        # NOTE: We should probably do some format verification here
                        self._users_fname = users_file
		self._mention_network_fname = os.path.join(dataset_dir,'mention_network.elist')
Ejemplo n.º 3
0
    def __init__(self, dataset_dir, users_file=None):

        settings_fname = os.path.join(dataset_dir, 'dataset.json')
        if os.path.exists(settings_fname):
            self._settings = jsonlib.load(open(settings_fname, 'r'))
        else:
            self._settings = {}

        # prepare for all data
        self._posts_fname = os.path.join(dataset_dir, 'posts.json.gz')
        if users_file is None:
            self._users_fname = os.path.join(dataset_dir, 'users.json.gz')
        else:
            # NOTE: We should probably do some format verification here
            self._users_fname = users_file
        self._mention_network_fname = os.path.join(dataset_dir,
                                                   'mention_network.elist')
Ejemplo n.º 4
0
    def __init__(self,
                 dataset_dir,
                 users_file='users.tsv.gz',
                 excluded_users=set(),
                 default_location_source='geo-median'):

        settings_fname = os.path.join(dataset_dir, 'dataset.json')
        if os.path.exists(settings_fname):
            self._settings = jsonlib.load(open(settings_fname, 'r'))
        else:
            self._settings = {}

        # prepare for all data
        self._dataset_dir = dataset_dir
        self._users_fname = os.path.join(dataset_dir, users_file)
        self._users_with_locations_fname = os.path.join(
            dataset_dir,
            'users.home-locations.' + default_location_source + '.tsv.gz')
        self._mention_network_fname = os.path.join(dataset_dir,
                                                   'mention_network.elist')
        self._bi_mention_network_fname = os.path.join(
            dataset_dir, 'bi_mention_network.elist')
        self.excluded_users = excluded_users