コード例 #1
0
 def _weight_feature(self, msg):
     Feature.extract(msg)
     score = 0.0
     for i in range(len(self.feature_name)):
         f = self.feature_name[i]
         w = self.w[i]
         if f in msg.feature:
             score += msg.feature[f] * w
     return score
コード例 #2
0
ファイル: autoweight.py プロジェクト: Kelvin-Zhong/sns-router
 def _weight_feature(self, msg):
     Feature.extract(msg)
     score = 0.0
     for i in range(len(self.feature_name)):
         f = self.feature_name[i]
         w = self.w[i]
         if f in msg.feature:
             score += msg.feature[f] * w
     return score
コード例 #3
0
def run_test_case():
    message_list = Serialize.loads(open('analysis/case.pickle').read())
    for m in message_list:
        Feature.extract(m)
        print "===="
        print m
        print "----"
        #print m.feature
        for f in sorted([f for f in m.feature]):
            print "%s: %.5f" % (f, m.feature[f])
        print "===="
コード例 #4
0
def run_test_case():
    message_list = Serialize.loads(open('analysis/case.pickle').read())
    for m in message_list:
        Feature.extract(m)
        print "===="
        print m
        print "----"
        #print m.feature
        for f in sorted([f for f in m.feature]):
            print "%s: %.5f" % (f, m.feature[f])
        print "===="
コード例 #5
0
    def msg2X(self, samples):
        '''
        Convert messages to data matrix format. 

        X: A dict. See explanation of _G()
        '''
        X = {}
        for m in samples.values():
            Feature.extract(m)
            x = []
            for name in self.feature_name:
                x.append(m.feature[name])
            X[m.msg_id] = x
        return X
コード例 #6
0
ファイル: autoweight.py プロジェクト: Kelvin-Zhong/sns-router
    def msg2X(self, samples):
        '''
        Convert messages to data matrix format. 

        X: A dict. See explanation of _G()
        '''
        X = {}
        for m in samples.values():
            Feature.extract(m)
            x = []
            for name in self.feature_name:
                x.append(m.feature[name])
            X[m.msg_id] = x
        return X
コード例 #7
0
def extract_all():
    import time
    begin = time.time()
    message = Serialize.loads(open('analysis/workspace.pickle').read())
    end = time.time()
    print "Load finish. Time elapsed: %.3f" % (end - begin)

    begin = time.time()
    for m in message['message_list']:
        Feature.extract(m)
    end = time.time()
    print "Feature extraction finish. Time elapsed: %.3f" % (end - begin)

    begin = time.time()
    open('analysis/workspace.pickle', 'w').write(Serialize.dumps(message))
    end = time.time()
    print "Dump finish. Time elapsed: %.3f" % (end - begin)
コード例 #8
0
def extract_all():
    import time
    begin = time.time()
    message = Serialize.loads(open('analysis/workspace.pickle').read())
    end = time.time()
    print "Load finish. Time elapsed: %.3f" % (end - begin)

    begin = time.time()
    for m in message['message_list']:
        Feature.extract(m)
    end = time.time()
    print "Feature extraction finish. Time elapsed: %.3f" % (end - begin)

    begin = time.time()
    open('analysis/workspace.pickle', 'w').write(Serialize.dumps(message))
    end = time.time()
    print "Dump finish. Time elapsed: %.3f" % (end - begin)
コード例 #9
0
ファイル: autoweight.py プロジェクト: Kelvin-Zhong/sns-router
 def __init__(self, samples, order, init_weight, learner):
     super(AutoWeight, self).__init__()
     self.samples = samples
     self.order = order
     self.learner = learner
     if init_weight is None:
         # Use one of the samples keys as sets of features to be trained. 
         # This is deprecated. Whenever possible, please init your features
         # with weight in 'weights.json'
         m = samples.values()[0]
         Feature.extract(m)
         self.feature_name = m.feature.keys()
     else:
         self.feature_name = init_weight.keys()
     self.X = self.msg2X(samples)
     if init_weight is None:
         self.w = self.initw(self.init_weight_kendall(self.feature_name, self.samples, self.order))
     else:
         self.w = self.initw(init_weight)
コード例 #10
0
 def __init__(self, samples, order, init_weight, learner):
     super(AutoWeight, self).__init__()
     self.samples = samples
     self.order = order
     self.learner = learner
     if init_weight is None:
         # Use one of the samples keys as sets of features to be trained.
         # This is deprecated. Whenever possible, please init your features
         # with weight in 'weights.json'
         m = samples.values()[0]
         Feature.extract(m)
         self.feature_name = m.feature.keys()
     else:
         self.feature_name = init_weight.keys()
     self.X = self.msg2X(samples)
     if init_weight is None:
         self.w = self.initw(
             self.init_weight_kendall(self.feature_name, self.samples,
                                      self.order))
     else:
         self.w = self.initw(init_weight)
コード例 #11
0
ファイル: select_samples.py プロジェクト: uestcer/sns-router
def export_arff(message_list, ds_name, fn_arff):
    '''
    Export message_list to Weka's arff file

    ds_name: the name of data set. Shown in first line 
             of arff file.
    '''

    all_tags = json.loads(open('tag_mapping.json').read())
    all_tags_r = {}
    for (k, v) in all_tags.iteritems():
        all_tags_r[v] = k
    all_tags_r[0] = "null"

    with open(fn_arff, 'w') as fp:
        fp.write("@relation %s\n\n" % (ds_name))
        fn = []

        # Write schema
        fp.write("@attribute id numeric\n")
        for fe in Feature.feature_extractors:
            for (f, t) in fe.schema.iteritems():
                fp.write("@attribute %s %s\n" % (f, t))
                fn.append(f)
        fp.write("@attribute class {%s}\n" % (",".join(all_tags.keys())))

        # Write data
        fp.write("\n\n@data\n")
        for m in message_list:
            # Ignore multi tagged messages for simplicity
            if len(m.tags) == 1:
                i = str(m.msg_id)
                t = all_tags_r[m.tags.keys()[0]]
                Feature.extract(m)
                fields = [str(m.feature[f]) for f in fn]
                fields.insert(0, i)
                fields.append(t)
                fp.write(",".join(fields) + "\n")