Exemple #1
0
def _update_dict_for_user(user, dict_for_user):
    """
    Write the dict into user's specific file
    :param user:
    :type user:
    :return:
    :rtype:
    """
    if FILE_MODE:
        dict_to_dump = {user: dict_for_user}

        filename = _get_file_name(user)
        with open(filename, 'wb') as outp:
            json.dump(Serializable.dumps(dict_to_dump), outp)

    else:
        filename = os.path.join(os.getcwd(), 'results', WIKINAME,
                                'user_graph.json')
        with open(filename, 'rb') as inp:
            user_dict = Serializable.loads(json.load(inp))

        user_dict[user] = dict_for_user

        with open(filename, 'wb') as outp:
            json.dump(Serializable.dumps(user_dict), outp)
Exemple #2
0
def _get_dict_for_user(user):
    """
    Get the file containing revision entries for the user "user"
    If the file is present, load its json which is effectively a dict

    Otherwise create the file
    :param user:
    :type user:
    :return:
    :rtype:
    """
    if FILE_MODE:
        filename = _get_file_name(user)
        if os.path.isfile(filename):
            with open(filename, 'rb') as inp:
                user_dict = Serializable.loads(json.load(inp))
        else:
            user_dict = {user: {}}

    else:
        filename = os.path.join(os.getcwd(), 'results', WIKINAME,
                                'user_graph.json')
        with open(filename, 'rb') as inp:
            user_dict = Serializable.loads(json.load(inp))

        if not user_dict.has_key(user):
            user_dict[user] = {}

    return user_dict[user]
Exemple #3
0
 def dumps(self):
     d = dict(
         in_features=self.in_features,
         out_features=self.out_features,
         weight=self.weight.data.cpu().numpy(),
         bias=None if self.bias is None else self.bias.data.cpu().numpy(),
     )
     return Serializable.dumps(d)
Exemple #4
0
 def loads(s, device):
     d = Serializable.loads(s)
     m = StdNet(d['args'], empty=True)
     for i, ms in enumerate(d['layers']):
         l = LinearWithSensitivity.loads(ms, device)
         m.layers.append(l)
         m.add_module('layer_%d' % i, l)
     return m
Exemple #5
0
 def loads(s, device):
     d = Serializable.loads(s)
     m = LinearExtended(d['in_features'],
                        d['out_features'],
                        bias=d['bias'] is not None)
     m.weight.data = torch.from_numpy(d['weight']).to(device)
     if d['bias'] is not None:
         m.bias.data = torch.from_numpy(d['bias']).to(device)
     return m
Exemple #6
0
 def loads(s, device):
     d = Serializable.loads(s)
     args = Storage(d['args'])
     m = RBFNet(args, empty=True)
     for i, ms in enumerate(d['layers']):
         l = RBFI.loads(ms, device)
         m.layers.append(l)
         m.add_module('layer_%d' % i, l)
     return m
Exemple #7
0
 def loads(s, device):
     d = Serializable.loads(s)
     args = dict(n_classes=10)
     args.update(d['args'])
     args = Storage(args)
     m = MWDNet(args, empty=True)
     for i, ms in enumerate(d['layers']):
         l = MWD.loads(ms, device)
         m.layers.append(l)
         m.add_module('layer_%d' % i, l)
     return m
Exemple #8
0
def read_model(in_fn, device):
    with open(in_fn, "r") as f:
        d = Serializable.from_json(f.read(), device=device)
        if 'encoding' in d:
            # New style model.
            model = d['model']
            model = model.to(device)
        else:
            # Old style model
            model_kind = d['kind']
            model_file, model_cls = OLD_NET_IDS.get(model_kind, None)
            module = importlib.import_module(model_file)
            cls = getattr(module, model_cls)
            model = cls.loads(d['model'], device)
    return model
Exemple #9
0
 def loads(s, device):
     """Reads itself from string s."""
     d = Serializable.loads(s)
     m = RBFI(d['in_features'],
              d['out_features'],
              andor=d['andor'],
              modinf=d['modinf'],
              regular_deriv=d['regular_deriv'],
              min_input=d['min_input'],
              max_input=d['max_input'],
              min_slope=d['min_slope'],
              max_slope=d['max_slope'])
     m.u.data = torch.from_numpy(d['u']).to(device)
     m.w.data = torch.from_numpy(d['w']).to(device)
     m.andor01.data = torch.from_numpy(d['andor01']).to(device)
     return m
Exemple #10
0
 def dumps(self):
     """Writes itself to a string."""
     # Creates a dictionary
     d = dict(
         in_features=self.in_features,
         out_features=self.out_features,
         min_input=self.w.lower_bound,
         max_input=self.w.upper_bound,
         min_slope=self.u.lower_bound,
         max_slope=self.u.upper_bound,
         modinf=self.modinf,
         regular_deriv=self.regular_deriv,
         andor=self.andor,
         andor01=self.andor01.cpu().numpy(),
         u=self.u.data.cpu().numpy(),
         w=self.w.data.cpu().numpy(),
     )
     return Serializable.dumps(d)
Exemple #11
0
 def dumps(self):
     d = dict(args=self.args.__dict__,
              layers=[l.dumps() for l in self.layers])
     return Serializable.dumps(d)
Exemple #12
0
    #     with open(user_contrib_file, 'rb+') as inp:
    #         user_contribs = json.load(inp)
    # else:
    #     user_contribs = {}

    # get_files(base_dir, user_graph, user_contribs)

    # get_split_files(base_dir)
    if FILE_MODE:
        get_files(base_dir)

    else:
        user_graph = get_user_dict(base_dir)
        filename = os.path.join(os.getcwd(), 'results', WIKINAME,
                                'user_graph.json')
        with open(filename, 'wb') as outp:
            outp.write(Serializable.dumps(user_graph))
    # with open(user_graph_file, 'wb+') as output:
    #     json.dump(user_graph, output)
    #
    # with open(user_contrib_file, 'wb+') as output:
    #     json.dump(user_contribs, output)
    #
    # # pprint(user_graph)
    # print "Users in graph", len(user_graph.keys())
    print "None counter", NONECTR

    # for i in random.sample(user_graph.keys(),10):
    #     pprint(user_graph[i])
    #     print "--------"
Exemple #13
0
def train_nn_using_k_lstm_bit(train_dict,
                              k=None,
                              N=1000,
                              quality=True,
                              fix_bit_val=None,
                              store=False,
                              picklefile=os.path.join(os.getcwd(), 'results',
                                                      'temp_model.pkl'),
                              weighted_learning=False,
                              balanced=True):
    """
    Train the LSTM and NNet combination using training dict.

    :param train_dict: dict containing entries of revisions per user
    :param k: Number of bits to be used from LSTM
    :param N: Number of iterations for network to train. Default is 1000
    :param quality: Boolean to control if working on quality, otherwise existence
    :param fix_bit_val: Fixed value of bit to be used if only that bit should be passed to NN
    :param store: Boolean to decide whether to store result in pickle
    :param picklefile: Pickle filename
    :param weighted_learning: Boolean to control whether learning is weighted or not
    :param balanced: Boolean to control whether results should be balanced before use or not
    :rtype tuple
    :return: Returns a tuple consisting of lstm and neural net (lstm, nnet)
    """
    train_items = train_dict.items()
    if balanced:
        train_items = _rebalance_data(train_items)

    # Send for training using k as no. of bits to use
    print "\n==Starting training== (Using %r iterations) and k=%r" % (N, k)
    print "Statuses-- Weighted: %r, Balanced %r" % (weighted_learning,
                                                    balanced)
    t_start = time.clock()
    (lstm_out, nn_out), errors = _train_nn_with_k_lstm_bits(
        train_items,
        k=k,
        N=N,
        fix_bit_val=fix_bit_val,
        weighted_learning=weighted_learning,
        quality=quality)
    print "Training completed in %r seconds" % (time.clock() - t_start)

    # Store the trained model into a pickle if store is True
    if store:
        file_basic_name = 'trained_lstm_%r_nn_%r_%r' % (
            k, N, "weighted" if weighted_learning else "unweighted")
        serialize_file_lstm = os.path.join(os.getcwd(), 'results',
                                           file_basic_name + 'lstm.json')
        serialize_file_nn = os.path.join(os.getcwd(), 'results',
                                         file_basic_name + 'nn.json')
        from json_plus import Serializable
        ser_result_lstm = Serializable.dumps(lstm_out)
        ser_result_nn = Serializable.dumps(nn_out)

        with open(serialize_file_lstm, 'wb') as output:
            json.dump(ser_result_lstm, output)
        with open(serialize_file_nn, 'wb') as output:
            json.dump(ser_result_nn, output)

        # Store the (lstm, nnet) type result into a pickle
        with open(picklefile, 'wb') as output:
            pickle.dump((lstm_out, nn_out), output, pickle.HIGHEST_PROTOCOL)

    return (lstm_out, nn_out)
Exemple #14
0
def write_model(m, out_fn):
    with open(out_fn, 'w') as f:
        d = dict(encoding=0, model=m)
        f.write(Serializable.dumps(d))
        for item in graph_item_list:
            self._compute_features_for_lstm(item)


def _f1(prec, rec):
    return (2.0 * prec * rec) / (1.0 * (prec + rec))


if __name__ == "__main__":
    # Get the data for concerned wiki

    graph_file = os.path.join(os.getcwd(), 'results', WIKINAME,
                              'reduced_user_graph.json')

    with open(graph_file, 'rb') as inp:
        wikidata = Serializable.load(inp)
        # wikidata = {k:wikidata[k] for k in random.sample(wikidata.keys(), 10000)}
        print len(wikidata)

    BREADTH = 1

    # contrib_file = os.path.join(os.getcwd(), 'results', 'user_contrib_test.json')
    #
    # with open(contrib_file, 'rb') as inp:
    #     user_contribs = json.load(inp)

    # Build labels

    # build_labels_for_user_quitting(user_contribs=user_contribs)
    # build_labels_for_revision_quality(wikidata=wikidata)
def _create_label_file(labels, name_suffix):
    filename = os.path.join(os.getcwd(), 'results', WIKINAME,
                            'labels_%s.json' % (name_suffix))
    with open(filename, 'wb') as outp:
        outp.write(Serializable.dumps(labels))
    pass
    p_value, z_left_tail = statistical_significance(a, b, level=level)


if __name__ == "__main__":

    WIKINAME = 'astwiki'
    NUMBER_OF_INSTANCES = 50000

    BREADTH = 15
    DEPTH = 1
    # results_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'results_breadth_%d_depth_%d.json' % (BREADTH, DEPTH))
    results_file = os.path.join(os.getcwd(), 'results', WIKINAME,
                                'results_breadth_%d_depth_%d_instances_%d.json' % (BREADTH, DEPTH, NUMBER_OF_INSTANCES))

    with open(results_file, 'rb') as inp:
        r1 = Serializable.loads(inp.read())

    BREADTH = 3
    DEPTH = 1
    results_file = os.path.join(os.getcwd(), 'results', WIKINAME,
                                'results_breadth_%d_depth_%d_instances_%d.json' % (BREADTH, DEPTH, NUMBER_OF_INSTANCES))

    # results_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'results_breadth_%d_depth_%d.json' % (BREADTH, DEPTH))
    with open(results_file, 'rb') as inp:
        r2 = Serializable.loads(inp.read())
    #
    # f1_label1_d1 = r1['f1']['0']
    # f1_label1_d2 = r2['f1']['0']

    f1_label1_d1 = r1['avg_rec']
    f1_label1_d2 = r2['avg_rec']