Ejemplo n.º 1
0
    def run(self):
        seed_filename = self.pass_in
        output_filename = self.pass_out

        status_count_threshold = self.param['status_count_threshold']
        following_count_threshold = self.param['following_count_threshold']

        seeds = dp_seeds.fromCSVThresholdedEng(seed_filename,
                status_count_threshold,
                following_count_threshold)

        fout = open(output_filename, 'w')

        for user_id, age_group in seeds:
            fout.write(str(user_id) + '\t' + str(age_group) + '\n')

        fout.close()
from dpFeature import Feature
import dpAnnotatedSeeds as dp_seeds
from dpDataLoader import loadFollowingFromDBofUser


class FollowingFeature(Feature):

    def convertSingle(self, user_id):
        friends = loadFollowingFromDBofUser(user_id)

        def _dummy(friend):     # insert feature weight
            return (friend, 1)

        friends = map(_dummy, friends)

        return friends


if __name__ == "__main__":

    seed_file = '../data/sampled2.8-annotated.csv'
    seeds = dp_seeds.fromCSVThresholdedEng(seed_file)

    feature_gen = FollowingFeature(seeds)

    fout_name = '../feature_set2/ver2.8-efollowing.libsvm'
    feature_gen.generateTo(fout_name)