def _update_dict_for_user(user, dict_for_user): """ Write the dict into user's specific file :param user: :type user: :return: :rtype: """ if FILE_MODE: dict_to_dump = {user: dict_for_user} filename = _get_file_name(user) with open(filename, 'wb') as outp: json.dump(Serializable.dumps(dict_to_dump), outp) else: filename = os.path.join(os.getcwd(), 'results', WIKINAME, 'user_graph.json') with open(filename, 'rb') as inp: user_dict = Serializable.loads(json.load(inp)) user_dict[user] = dict_for_user with open(filename, 'wb') as outp: json.dump(Serializable.dumps(user_dict), outp)
def _get_dict_for_user(user): """ Get the file containing revision entries for the user "user" If the file is present, load its json which is effectively a dict Otherwise create the file :param user: :type user: :return: :rtype: """ if FILE_MODE: filename = _get_file_name(user) if os.path.isfile(filename): with open(filename, 'rb') as inp: user_dict = Serializable.loads(json.load(inp)) else: user_dict = {user: {}} else: filename = os.path.join(os.getcwd(), 'results', WIKINAME, 'user_graph.json') with open(filename, 'rb') as inp: user_dict = Serializable.loads(json.load(inp)) if not user_dict.has_key(user): user_dict[user] = {} return user_dict[user]
def dumps(self): d = dict( in_features=self.in_features, out_features=self.out_features, weight=self.weight.data.cpu().numpy(), bias=None if self.bias is None else self.bias.data.cpu().numpy(), ) return Serializable.dumps(d)
def loads(s, device): d = Serializable.loads(s) m = StdNet(d['args'], empty=True) for i, ms in enumerate(d['layers']): l = LinearWithSensitivity.loads(ms, device) m.layers.append(l) m.add_module('layer_%d' % i, l) return m
def loads(s, device): d = Serializable.loads(s) m = LinearExtended(d['in_features'], d['out_features'], bias=d['bias'] is not None) m.weight.data = torch.from_numpy(d['weight']).to(device) if d['bias'] is not None: m.bias.data = torch.from_numpy(d['bias']).to(device) return m
def loads(s, device): d = Serializable.loads(s) args = Storage(d['args']) m = RBFNet(args, empty=True) for i, ms in enumerate(d['layers']): l = RBFI.loads(ms, device) m.layers.append(l) m.add_module('layer_%d' % i, l) return m
def loads(s, device): d = Serializable.loads(s) args = dict(n_classes=10) args.update(d['args']) args = Storage(args) m = MWDNet(args, empty=True) for i, ms in enumerate(d['layers']): l = MWD.loads(ms, device) m.layers.append(l) m.add_module('layer_%d' % i, l) return m
def read_model(in_fn, device): with open(in_fn, "r") as f: d = Serializable.from_json(f.read(), device=device) if 'encoding' in d: # New style model. model = d['model'] model = model.to(device) else: # Old style model model_kind = d['kind'] model_file, model_cls = OLD_NET_IDS.get(model_kind, None) module = importlib.import_module(model_file) cls = getattr(module, model_cls) model = cls.loads(d['model'], device) return model
def loads(s, device): """Reads itself from string s.""" d = Serializable.loads(s) m = RBFI(d['in_features'], d['out_features'], andor=d['andor'], modinf=d['modinf'], regular_deriv=d['regular_deriv'], min_input=d['min_input'], max_input=d['max_input'], min_slope=d['min_slope'], max_slope=d['max_slope']) m.u.data = torch.from_numpy(d['u']).to(device) m.w.data = torch.from_numpy(d['w']).to(device) m.andor01.data = torch.from_numpy(d['andor01']).to(device) return m
def dumps(self): """Writes itself to a string.""" # Creates a dictionary d = dict( in_features=self.in_features, out_features=self.out_features, min_input=self.w.lower_bound, max_input=self.w.upper_bound, min_slope=self.u.lower_bound, max_slope=self.u.upper_bound, modinf=self.modinf, regular_deriv=self.regular_deriv, andor=self.andor, andor01=self.andor01.cpu().numpy(), u=self.u.data.cpu().numpy(), w=self.w.data.cpu().numpy(), ) return Serializable.dumps(d)
def dumps(self): d = dict(args=self.args.__dict__, layers=[l.dumps() for l in self.layers]) return Serializable.dumps(d)
# with open(user_contrib_file, 'rb+') as inp: # user_contribs = json.load(inp) # else: # user_contribs = {} # get_files(base_dir, user_graph, user_contribs) # get_split_files(base_dir) if FILE_MODE: get_files(base_dir) else: user_graph = get_user_dict(base_dir) filename = os.path.join(os.getcwd(), 'results', WIKINAME, 'user_graph.json') with open(filename, 'wb') as outp: outp.write(Serializable.dumps(user_graph)) # with open(user_graph_file, 'wb+') as output: # json.dump(user_graph, output) # # with open(user_contrib_file, 'wb+') as output: # json.dump(user_contribs, output) # # # pprint(user_graph) # print "Users in graph", len(user_graph.keys()) print "None counter", NONECTR # for i in random.sample(user_graph.keys(),10): # pprint(user_graph[i]) # print "--------"
def train_nn_using_k_lstm_bit(train_dict, k=None, N=1000, quality=True, fix_bit_val=None, store=False, picklefile=os.path.join(os.getcwd(), 'results', 'temp_model.pkl'), weighted_learning=False, balanced=True): """ Train the LSTM and NNet combination using training dict. :param train_dict: dict containing entries of revisions per user :param k: Number of bits to be used from LSTM :param N: Number of iterations for network to train. Default is 1000 :param quality: Boolean to control if working on quality, otherwise existence :param fix_bit_val: Fixed value of bit to be used if only that bit should be passed to NN :param store: Boolean to decide whether to store result in pickle :param picklefile: Pickle filename :param weighted_learning: Boolean to control whether learning is weighted or not :param balanced: Boolean to control whether results should be balanced before use or not :rtype tuple :return: Returns a tuple consisting of lstm and neural net (lstm, nnet) """ train_items = train_dict.items() if balanced: train_items = _rebalance_data(train_items) # Send for training using k as no. of bits to use print "\n==Starting training== (Using %r iterations) and k=%r" % (N, k) print "Statuses-- Weighted: %r, Balanced %r" % (weighted_learning, balanced) t_start = time.clock() (lstm_out, nn_out), errors = _train_nn_with_k_lstm_bits( train_items, k=k, N=N, fix_bit_val=fix_bit_val, weighted_learning=weighted_learning, quality=quality) print "Training completed in %r seconds" % (time.clock() - t_start) # Store the trained model into a pickle if store is True if store: file_basic_name = 'trained_lstm_%r_nn_%r_%r' % ( k, N, "weighted" if weighted_learning else "unweighted") serialize_file_lstm = os.path.join(os.getcwd(), 'results', file_basic_name + 'lstm.json') serialize_file_nn = os.path.join(os.getcwd(), 'results', file_basic_name + 'nn.json') from json_plus import Serializable ser_result_lstm = Serializable.dumps(lstm_out) ser_result_nn = Serializable.dumps(nn_out) with open(serialize_file_lstm, 'wb') as output: json.dump(ser_result_lstm, output) with open(serialize_file_nn, 'wb') as output: json.dump(ser_result_nn, output) # Store the (lstm, nnet) type result into a pickle with open(picklefile, 'wb') as output: pickle.dump((lstm_out, nn_out), output, pickle.HIGHEST_PROTOCOL) return (lstm_out, nn_out)
def write_model(m, out_fn): with open(out_fn, 'w') as f: d = dict(encoding=0, model=m) f.write(Serializable.dumps(d))
for item in graph_item_list: self._compute_features_for_lstm(item) def _f1(prec, rec): return (2.0 * prec * rec) / (1.0 * (prec + rec)) if __name__ == "__main__": # Get the data for concerned wiki graph_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'reduced_user_graph.json') with open(graph_file, 'rb') as inp: wikidata = Serializable.load(inp) # wikidata = {k:wikidata[k] for k in random.sample(wikidata.keys(), 10000)} print len(wikidata) BREADTH = 1 # contrib_file = os.path.join(os.getcwd(), 'results', 'user_contrib_test.json') # # with open(contrib_file, 'rb') as inp: # user_contribs = json.load(inp) # Build labels # build_labels_for_user_quitting(user_contribs=user_contribs) # build_labels_for_revision_quality(wikidata=wikidata)
def _create_label_file(labels, name_suffix): filename = os.path.join(os.getcwd(), 'results', WIKINAME, 'labels_%s.json' % (name_suffix)) with open(filename, 'wb') as outp: outp.write(Serializable.dumps(labels)) pass
p_value, z_left_tail = statistical_significance(a, b, level=level) if __name__ == "__main__": WIKINAME = 'astwiki' NUMBER_OF_INSTANCES = 50000 BREADTH = 15 DEPTH = 1 # results_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'results_breadth_%d_depth_%d.json' % (BREADTH, DEPTH)) results_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'results_breadth_%d_depth_%d_instances_%d.json' % (BREADTH, DEPTH, NUMBER_OF_INSTANCES)) with open(results_file, 'rb') as inp: r1 = Serializable.loads(inp.read()) BREADTH = 3 DEPTH = 1 results_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'results_breadth_%d_depth_%d_instances_%d.json' % (BREADTH, DEPTH, NUMBER_OF_INSTANCES)) # results_file = os.path.join(os.getcwd(), 'results', WIKINAME, 'results_breadth_%d_depth_%d.json' % (BREADTH, DEPTH)) with open(results_file, 'rb') as inp: r2 = Serializable.loads(inp.read()) # # f1_label1_d1 = r1['f1']['0'] # f1_label1_d2 = r2['f1']['0'] f1_label1_d1 = r1['avg_rec'] f1_label1_d2 = r2['avg_rec']