def OFSGD_update(x_t, y_t, NumFeature, w): f_t = dot(w, x_t) if y_t * f_t <= 1: delt = (eta * y_t * x_t) w = w + delt #w = dot(w, min(1, 1 / (sqrt(lbda) * linalg.norm(w)))) w = w * min(1, 1 / (sqrt(lbda) * linalg.norm(w))) truncate(w, NumFeature) else: w = w return w, f_t
def test_input_1(self): # Failure message: # expected truncate("Cool", 2) # to equal "Truncation must be at least 3 characters." self.assertEqual(truncate("Cool", 2), "Truncation must be at least 3 characters.")
def truncate_1d(x, bitsize=32): ''' truncate theano varibale @param x: theano var @return theano variable with truncated value ''' value = x.eval() for num in range(value.shape[0]): value[num] = truncate.truncate(value[num], bitsize) return x
def truncate_1n(x, bitsize=32): ''' truncate numpy array @param x: numpy array @return a truncated numpy array ''' value = x.eval() for num in range(value.shape[0]): value[num] = truncate.truncate(value[num], bitsize) x.assign(value) return x
def get_sources(path): with open(path, "r", errors = "ignore") as f: reader = csv.reader(f, delimiter = ",") next(reader) sources = [] for line in reader: url = truncate(line[1]) if url not in sources: sources.append(url) return sources
def truncate_2d(x, bitsize=32): ''' truncate theano varibale @param x: theano var @return theano variable with truncated value ''' value = x.eval() for row in range(value.shape[0]): for col in range(value.shape[1]): value[row][col] = truncate.truncate(value[row][col], bitsize) x.set_value(value) return x
def truncate_2n(x, bitsize=32): ''' truncate numpy array @param x: numpy array @return a truncated numpy array ''' value = x.eval() for row in range(value.shape[0]): for col in range(value.shape[1]): value[row][col] = truncate.truncate(value[row][col], bitsize) x.assign(value) return x
def share_truncate(share_name): share = samba.get_share(share_name) if not share: return flask.jsonify({"success":False, "info":"SHARENOTEXIST"}) share_path = samba.share_real_path(share) if not os.path.isdir(share_path): return flask.jsonify({"success":False, "info":"DIRNOTEXIST"}) rst = truncate.truncate(samba.share_real_path(share)) try: os.symlink("/dev/null", os.path.join(oscar.get_database_dir(share_path), ".walk_requested")) except OSError, e: if e.errno != errno.EEXIST: raise
def truncate_4d(x, bitsize=32): ''' truncate theano varibale @param x: theano var @return theano variable with truncated value ''' value = x.eval() for dim1 in range(value.shape[0]): for dim2 in range(value.shape[1]): for dim3 in range(value.shape[2]): for dim4 in range(value.shape[3]): value[dim1][dim2][dim3][dim4] = truncate.truncate(value[dim1][dim2][dim3][dim4], bitsize) x.set_value(value) return x
def OFSGD_imbalance(X, Y, options, id_list): NumFeature = options.NumFeature ID = id_list error_count = 0 mistakes = [] error_list = [] eta = 0.2 lbda = 0.01 k = 2 w = zeros([X.shape[1], 1]) for t in xrange(len(ID)): id = ID[t] x_t = transpose(X[id - 1, :]) x_t = x_t.reshape(len(x_t), 1) f_t = dot(transpose(w), x_t) y_t = Y[id - 1] if y_t * f_t <= 0: error_count += 1 error_list.append(id) if y_t * f_t <= 1: if f_t < 0: delt = (0.8 * eta * y_t * x_t) else: delt = (eta * y_t * x_t) w = w + delt w = dot(w, min(1, 1 / (sqrt(lbda) * linalg.norm(w)))) truncate(w, NumFeature) if t % 10 == 1: mistakes.append(error_count/float(t)) return mistakes
def test_input_6(self): # Failure message: # expected truncate("Another test", 12) to equal "Another t..." self.assertEqual(truncate("Another test", 12), "Another t...")
def test_input_8(self): # Failure message: # expected truncate("Woah", 3) to equal "..." self.assertEqual(truncate("Woah", 3), "...")
def test_input_4(self): # Failure message: # expected truncate("Hello World", 6) to equal "Hel..." self.assertEqual(truncate("Hello World", 6), "Hel...")
def test_input_5(self): # Failure message: # expected truncate("Problem solving is the best!", 10) to equal "Problem..." self.assertEqual( truncate("Problem solving is the best!", 10), "Problem...")
class SQL: def __init__(self, parent, database, sql_insert): # Create class variable references self.parent = parent self.database = database self.sql_insert = sql_insert self.last_table = '' self.last_selection = [] if DEBUG: print self.sql_insert # Parse the sql insert until no more characters are left while not self.sql_insert == '': if self.sql_insert.find(' ') > -1: case = self.sql_insert[0:self.sql_insert.find(' ')] else: case = self.sql_insert case = case.upper() if (case == 'CREATE'): create(self) elif (case == 'ALTER'): alter(self) elif (case == 'DROP'): drop(self) elif (case == 'INSERT'): insert(self) elif (case == 'SELECT'): s = select(self) join_union_case = self.sql_insert[0:self.sql_insert. find(' ')].upper() if join_union_case == 'UNION': self.sql_insert = self.sql_insert[len(case):].strip() s1 = eval(s) s = select(self) s2 = eval(s) if len(s1[0]) != len(s2[0]): print 'SQL: each SELECT statement within the UNION must have the same number of columns.' s = '' else: wrongTypes = 0 i = len(s1[0]) - 1 while i >= 0: #iterate through the first tuple if not isinstance(s1[0][i], type(s2[0][i])): print 'SQL: each SELECT statement within the UNION must have columns of the same data types in the same order.' s = '' wrongTypes = 1 #flag indicates that elements of the two selects aren't of the same data types break i -= 1 if not wrongTypes: ret = [] for trple1 in s1: ret.append( trple1 ) #append all triples in the first set to the output set for trple2 in s2: found = 0 for addedTrple in s1: if trple2 == addedTrple: #if found in both the first and second don't append it again to the output set found = 1 break if found == 0: #if not found in the second set append it to the output set ret.append(trple2) x = ", ".join(map(str, ret)) s = '[' + x + ']' elif join_union_case == 'INNER' or join_union_case == 'LEFT' or join_union_case == 'RIGHT' or join_union_case == 'FULL': s = joins(self) parent.python += s elif (case == 'TRUNCATE'): truncate(self) elif (case == 'DELETE'): delete(self) elif (case == 'UPDATE'): update(self) elif (case == 'PRINT'): self.sql_insert = self.sql_insert[len(case):].strip() parent.python += self.print_select(select(self)) # Non-standard SQL elif (case == 'DATABASEPRINT'): self.sql_insert = self.sql_insert[len(case):].strip() s = self.databaseprint() parent.python += s elif (case == 'CONTENTS'): self.sql_insert = self.sql_insert[len(case):].strip() s = select(self) s = '[' + s.replace('[', '').replace(']', '') + ']' parent.python += s elif (case == 'TRIPLES'): s = ", ".join(map(str, self.database['triples'])) parent.python += s self.sql_insert = self.sql_insert[len(case):].strip() else: print self.sql_insert raise NameError( 'SQL: Statement incorrect or not yet supported: ' + case) if DEBUG: print
def train_model(): model_exist = False lamb = 1e-4 prob = 0.6 ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') txt_file = open("a.txt", "w") start_sent = '<SOS>' start_index = convert_sentence_to_index(start_sent) max_val = 0 for key in i2w: temp_val = int(key) if (max_val < temp_val): max_val = temp_val if (model_exist): model, optimizer, epoch, loss = load_model(max_val) model.to(device) optimizer = optim.Adam(model.parameters()) else: model = Model(256, max_val + 1, prob_vocab) model = model.to(device) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index=0) count = 0 chats_complted = 0 for epoch in range(200): for data in movie_data: tot_loss = 0 count = count + 1 if (count > 0): movie = movie_data[data] chats = movie.chat plot = movie.plot review = movie.review comments = movie.comments review = review comp_know=truncate(plot,comments,review,500) ##print(comp_know) ##print(plot) plot_sent_indx_arr = convert_knowledge(comp_know) review_sent_indx_arr = convert_knowledge(review) comment_sent_indx_arr = convert_knowledge(comments) # just the plot # model.knowledge.forward(plot_sent_indx) tot_loss = 0 # if (len(comments) > 0 and len(review) > 0): encoder_in = [] decoder_ou = [] for chat in chats: deq = deque(maxlen=2) talk = chat.chat if len(chat.chat) % 2 != 0 and len(chat.chat) > 2: talk = talk.pop() encoder_in = talk[0] decoder_ou = talk[1] ##encoder_in.extend(talk[0::1]) ##decoder_ou.extend(talk[1::2]) for enc, dec in minibatch(encoder_in, decoder_ou): if (len(enc) < batch_size): diff = batch_size - len(enc) for k in range(diff): enc.append("<PAD>") dec.append("<PAD>") enc_lengths = [] dec_lengths = [] for ea in enc: enc_lengths.append(len(ea.split())) for da in dec: dec_lengths.append(len(da.split())) kj = 0 for lo in dec_lengths: if (lo > 40): kj = 1 if (kj == 1): kj = 0 continue input_sent, dec_sent_index, = preprocess(enc, dec) enc_lengths = torch.tensor(enc_lengths).long().to(device) dec_lengths = torch.tensor(dec_lengths).long().to(device) input_sent = torch.tensor(input_sent).long().to(device) dec_sent_index = torch.tensor(dec_sent_index).long().to(device) ##dec_sent_index.requires_grad=False ##print(input_sent.shape,dec_sent_index.shape) ##print(enc_lengths,dec_lengths) know_hidd = model.forward_knowledge_movie(plot_sent_indx_arr) isRely = True start_index = torch.tensor([1]).repeat(batch_size, 1).long().to(device) output, coverage, current_attention = model.forward(input_sent, dec_sent_index, start_index, True, know_hidd, isRely, plot_sent_indx_arr, review_sent_indx_arr, comment_sent_indx_arr, enc_lengths, dec_lengths,None) org_word_index = dec_sent_index.clone() max_prob_index = torch.argmax(output, dim=2) batch_sentences = [] ##print(dec_lengths) for b in range(max_prob_index.shape[0]): sentence_str = '' actual_len = dec_lengths[b] for w in range(actual_len): word = i2w[str((max_prob_index[b][w]).item())] sentence_str += word + ' ' batch_sentences.append(sentence_str) sent_index=0 for sentence in batch_sentences: ##print(sentence) if (len(sentence) > 10): index = enc[sent_index].rfind("<EOS>") if (index == -1): sent = enc[sent_index] else: sent = enc[sent_index][index + 6:] txt_file.write("Speaker 1:" + sent) txt_file.write("\n") txt_file.write("Model: " + sentence.encode('utf-8').decode('utf-8')) txt_file.write("\n") txt_file.write("Speaker 2:" + dec[sent_index]) txt_file.write("\n") txt_file.flush() sent_index+=1 ##print(batch_sentences) ##ls=torch.min(coverage,current_attention,dim=1) for j in range(0, dec_sent_index.shape[1]): ##output_text += (i2w[str(index.item())]) + " " if (j == 0): att_sum = torch.sum(torch.min(coverage[:, j, :], current_attention[:, j, :]), dim=1) else: att_sum = torch.sum(torch.min(coverage[:, j, :], current_attention[:, j, :]), dim=1) + att_sum ##print(output.view(output.shape[0]*output.shape[1],output.shape[2]).shape,org_word_index[:-1].shape) org_word_index = org_word_index.view(output.shape[0] * output.shape[1]) output = output.view(output.shape[0] * output.shape[1], output.shape[2]) le1 = org_word_index.shape[0] / 2 le1 = int(le1) for ind in range(0, batch_size): diff = le1 - dec_lengths[ind] for op in range(dec_lengths[ind], le1): output[op] *= 1e-30 loss = criterion(output, org_word_index) + torch.mean(att_sum) ##loss=torch.sum(loss) print(loss.item()) if (loss.item() < 50): tot_loss += loss.item() model.zero_grad() loss.backward() optimizer.step() chats_complted += batch_size else: torch.cuda.empty_cache() print(torch.cuda.memory_allocated(device=device) * math.pow(10, -9)) txt_file.write("Movie Completed " + str(count)) txt_file.write("\n") torch.cuda.empty_cache() if (count % 100 == 0): save_model(epoch, loss, optimizer, model) print("Epoch Completed") txt_file.close()
def test__both_truncate(): assert truncate(["abc", "def"], char_limit=6, nl_limit=1)[:-1] == ["abc"] assert truncate(["abc", "def"], char_limit=3, nl_limit=2)[:-1] == ["abc"]
def test__one_truncate(): assert truncate(["abc", "def"], char_limit=4)[:-1] == ["abc", "d"] assert truncate(["abc", "def"], nl_limit=1)[:-1] == ["abc"]
def test__no_truncate(): assert truncate(["abc", "def"]) == ["abc", "def"] assert truncate(["abc", "def"], char_limit=72) == ["abc", "def"] assert truncate(["abc", "def"], nl_limit=3) == ["abc", "def"] assert truncate(["abc", "def"], char_limit=6, nl_limit=2) == ["abc", "def"]
def _truncator(s): """ Default configuration for the output truncator. """ return truncate.truncate(s, 200, 4)
def test_input_10(self): # Failure message: # expected truncate("Holy guacamole!", 152) to equal "Holy guacamole!" self.assertEqual(truncate("Holy guacamole!", 152), "Holy guacamole!")
def load_batches(epoch, txtfile, verbose=1, samples_per_batch=4000): ''' Generator for loading batches of frames''' if os.path.exists(txtfile): f = open(txtfile, 'r') data_dump_no = int(f.read()) f.close() else: f = open(txtfile, 'w+') data_dump_no = 1 f.write(str(data_dump_no)) f.close() data_dump = 'dataset_minimap_800x600-' data = data_dump + str(data_dump_no) + '.pz' print(data) print('Opening dump.no {}'.format(data_dump_no)) dataset = gzip.open(data) batch_count = 0 # y = [] # outputs x = [] # input s = [] # steering # t = [] # throttle # b = [] # brake count = 0 while True: try: print('----------- On Epoch: ' + str(epoch) + ' ----------') print('----------- On Batch: ' + str(batch_count) + ' ----------') while count < samples_per_batch: data_dict = pickle.load(dataset) steering = int(float(data_dict['steering']) * 750) + 500 if steering >= 1000: steering = 999 if steering < 0: steering = 0 if 470 <= steering <= 530: continue image = frame2numpy(data_dict['frame'], (800, 600)) image = minimap_processing(image) # cv2.imshow('original', image) # if cv2.waitKey(1) & 0xFF == ord('q'): # break image = (image / 255 - .5) * 2 x.append(image) # Steering in dict is between -1 and 1, scale to between 0 and 999 # and then 0 to 34 for categorical input if steering >= 1000: steering = 999 if steering < 0: steering = 0 steering = truncate(steering) s.append(steering) # # Throttle in dict is between 0 and 1, scale to between 0 and 49 for categorical input # throttle = int(data_dict['throttle'] * 50) # if throttle >= 50: # throttle = 49 # if throttle < 0: # throttle = 0 # t.append(throttle) # # # brake in dict is between 0 and 1, scale to between 0 and 49 for categorical input # brake = int(data_dict['brake'] * 50) # if brake >= 50: # brake = 49 # if brake < 0: # brake = 0 # b.append(brake) count += 1 if (count % 250) == 0 and verbose == 1: print(' ' + str(count) + ' data points loaded in batch.') count = 0 print('Batch loaded.') s = np_utils.to_categorical(s, num_classes=35) # t = np_utils.to_categorical(t, num_classes=50) # b = np_utils.to_categorical(b, num_classes=50) # y = np.hstack([s, b]) # Train test split x_train, x_test, y_train, y_test = train_test_split(x, s, test_size=0.2, random_state=1, shuffle=True) # y = [] x = [] s = [] # steering # t = [] # throttle # b = [] # brake batch_count += 1 yield x_train, y_train, x_test, y_test, samples_per_batch, batch_count except EOFError: data_dump_no += 1 f = open(txtfile, 'w+') f.write(str(data_dump_no)) f.close() print('Opening dump.no {}'.format(data_dump_no)) data = data_dump + str(data_dump_no) + '.pz' if os.path.exists(data): dataset = gzip.open(data) pass else: print('File Completed') break except Exception as e: print(e) data_dump_no += 1 f = open(txtfile, 'w+') f.write(str(data_dump_no)) f.close() print('Data of this file is corrupted after this point. ' 'Don\'t worry, Opening next file') print('Opening dump.no {}'.format(data_dump_no)) data = data_dump + str(data_dump_no) + '.pz' if os.path.exists(data): dataset = gzip.open(data) pass else: print(e) print('Conversion Complete.' ' Yay!!!!!!') break pass
def dmtruncate(value, length=1): return truncate(value,length=length)
def test_input_9(self): # Failure message: # expected truncate("Yo", 100) to equal "Yo" self.assertEqual(truncate("Yo", 100), "Yo")
def test_ok(self): self.assertEqual(truncate('heqingyang',5) , 'heqin...' ) self.assertEqual(truncate('heqingyang',20) , 'heqingyang' )
def test_right_length(self): # Failure message: # expected truncate("Hey", 3) to equal "Hey" self.assertEqual(truncate("Hey", 3), "...")
def truncate_share_folder_index(share_name): share = oscar.get_share(share_name) if not share: return (False, "SHARENOTEXIST") if not os.path.isdir(share.path): return (False, "DIRNOTEXIST") rst = truncate.truncate(share.path) return (rst, None)