def do_train(args): pretrained_embeddings, token2id = util.load_word_embedding(input_file=args.vectors, cache='cache') stopwords = util.load_stopwords() stopwords = None train_data = util.Data(args.data_train, args.ltp_data, stopwords=stopwords) dev_data = util.Data(args.data_dev, args.ltp_data, max_length=train_data.max_length, stopwords=stopwords) config = Config(args) print(train_data.max_length) # 配置参数. 测试集如何设置? _, config.max_length = train_data.get_metadata() config.n_classes = len(train_data.LABELS) config.n_word_embed_size = len(pretrained_embeddings[0]) with tf.Graph().as_default(): logger.info("Building model...",) start = time.time() model = Classifier(pretrained_embeddings, token2id, config) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session: session.run(init) score = model.fit(session, saver, train_data, dev_data) print("\n") logger.info("training finished, took %.2f seconds with P: %.2f", time.time() - start, score)
def do_predict(args): pretrained_embeddings, token2id = util.load_word_embedding(input_file=args.vectors, cache='cache') stopwords = util.load_stopwords() stopwords = None train_data = util.Data(args.data_train, args.ltp_data, stopwords=stopwords) test_data = util.Data(args.data_test, args.ltp_data, max_length=train_data.max_length, stopwords=stopwords) config = Config(args) # 配置参数. 测试集如何设置? _, config.max_length = train_data.get_metadata() config.n_classes = len(train_data.LABELS) config.n_word_embed_size = len(pretrained_embeddings[0]) config.batch_size = len(test_data.data) with tf.Graph().as_default(): logger.info("Building model...",) start = time.time() model = Classifier(pretrained_embeddings, token2id, config) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session: session.run(init) saver.restore(session, model.config.output_model) labels, prediction = model.output(session, test_data, None) print(labels) print(prediction) test_data.update_labels(prediction).save_result()
def svhn_test(model, num_label=500): # Load SVHN dataset # load data on the cpu with tf.device('/CPU:0'): X_train, y_train = tfds.as_numpy( tfds.load('svhn_cropped', split='train', as_supervised=True, batch_size=-1)) X_test, y_test = tfds.as_numpy( tfds.load('svhn_cropped', split='test', as_supervised=True, batch_size=-1)) # one-hot encode the outs y_train = np.eye(10)[y_train.reshape(-1)] y_test = np.eye(10)[y_test.reshape(-1)] # cast it all to floats for image augmentation, rescale to [0,1] X_train = X_train.astype('float32') / np.float(255.0) X_test = X_test.astype('float32') / np.float(255.0) # whiten the data or apply zca X_train = whiten_norm(X_train) X_test = whiten_norm(X_test) print('loaded svfn', X_train.shape, X_test.shape) # Setup test set test = util.Data(X_test, y_test, None) # Split training test into labeled and unlabeled train = util.label_unlabel_split(X_train, y_train, num_label, 10) # Split training data into training and validation (train, valid) = util.train_test_valid_split(train.X, train.y, split=(0.9, 0.1), U=train.U) print('TR:', train.X.shape, train.y.shape, train.U.shape) print('v', valid.X.shape, valid.y.shape) # fit on the gpu with tf.device('/GPU:0'): # Train model using training and validation sets hist = model.fit(train, valid) print('evaluating on (subset) of test set...') with tf.device('/CPU:0'): # Test the model using test set y_pred = model.predict(test.X[0:1000]) acc = float( tf.reduce_mean( tf.keras.metrics.categorical_accuracy(test.y[0:1000], y_pred))) print(model.name, ' : acc:', acc) return model, {'hist': hist, 'acc': acc}
def make_request(self, is_part_upload=False, part_index=None, file_location=None, save_path_parent=None, file_name=None, is_range_download=False, part_download_queue=None, range_start=None, stop_flag_obj=None, is_last_retry=False): has_none_been_put = False chunk_size = 65536 peer_addr = self.my_http_connection.host local_addr = '' http_response = None recv_body = '' self.response.start_time = time.time() try: self.my_http_connection.connection.putrequest( self.obs_request.method, self.obs_request.url, skip_host=1) # send headers for k in self.obs_request.headers.keys(): if isinstance(self.obs_request.headers[k], list): for i in self.obs_request.headers[k]: self.my_http_connection.connection.putheader(k, i) else: self.my_http_connection.connection.putheader(k, self.obs_request.headers[ k]) self.my_http_connection.connection.endheaders() local_addr = str( self.my_http_connection.connection.sock._sock.getsockname()) peer_addr = str( self.my_http_connection.connection.sock._sock.getpeername()) logging.debug( 'Request:[%s], conn:[%s->%s], sendURL:[%s], sendHeaders:[%r], sendContent:[%s]' % ( self.obs_request.request_type, local_addr, peer_addr, self.obs_request.url, self.obs_request.headers, self.obs_request.send_content[0:1024])) if self.obs_request.content_length > 0 and not self.obs_request.send_content: if is_part_upload: with open(file_location, 'rb') as obj_to_put: obj_to_put.seek(part_index) while self.response.send_bytes < self.obs_request.content_length: if stop_flag_obj.flag: raise Exception( 'Stop Because Some Part_upload Failed') if self.obs_request.content_length - self.response.send_bytes >= chunk_size: chunk = obj_to_put.read(chunk_size) self.response.send_bytes += chunk_size else: chunk = obj_to_put.read( self.obs_request.content_length - self.response.send_bytes) self.response.send_bytes += ( self.obs_request.content_length - self.response.send_bytes) self.my_http_connection.connection.send(chunk) else: with open(file_location, 'rb') as obj_to_put: while self.response.send_bytes < self.obs_request.content_length: if self.obs_request.content_length - self.response.send_bytes >= chunk_size: chunk = obj_to_put.read(chunk_size) self.response.send_bytes += chunk_size else: chunk = obj_to_put.read( self.obs_request.content_length - self.response.send_bytes) self.response.send_bytes += ( self.obs_request.content_length - self.response.send_bytes) self.my_http_connection.connection.send(chunk) else: self.my_http_connection.connection.send( self.obs_request.send_content) self.response.send_bytes += len(self.obs_request.send_content) wait_response_time_start = time.time() logging.debug('total send bytes: %d, content-length: %d' % ( self.response.send_bytes, self.obs_request.content_length)) # get response http_response = self.my_http_connection.connection.getresponse( buffering=True) wait_response_time = time.time() - wait_response_time_start logging.debug('get response, wait time %.3f' % wait_response_time) # read the body content_length = int( http_response.getheader('Content-Length', '-1')) logging.debug('get ContentLength: %d' % content_length) self.response.content_length = content_length self.response.request_id = http_response.getheader( 'x-amz-request-id', '9999999999999998') self.response.id2 = http_response.getheader('x-amz-id-2', 'None') if http_response.status < 300 and self.obs_request.request_type == 'GetObject': if not is_range_download: file_path = os.path.join(save_path_parent, file_name) save_path = os.path.dirname(file_path) if not os.path.isdir(save_path): try: os.makedirs(save_path) except: pass with open(file_path, 'wb') as f: try: while True: chunk = http_response.read(65536) if not chunk: logging.info('chunk is empty, break cycle') recv_body = '[receive content], length: %d' % self.response.recv_bytes break self.response.recv_bytes += len(chunk) f.write(chunk) except Exception, e: logging.error('download file(%s) error(%s)' % ( self.obs_request.key, e)) try: os.remove(file_path) except Exception: pass else: count = 0 chunk_size = 65536 while not stop_flag_obj.flag: chunk = http_response.read(chunk_size) if not chunk: logging.info('chunk is empty, break cycle') recv_body = '[receive content], length: %d' % self.response.recv_bytes break self.response.recv_bytes += len(chunk) offset = range_start + chunk_size * count data = util.Data(chunk=chunk, offset=offset) while not stop_flag_obj.flag: try: part_download_queue.put(data, block=True, timeout=1) break except Full: pass else: logging.info( 'stop put data, range_start: %d' % range_start) raise Exception( 'Stop Because Some Range_download Failed') count += 1 else:
def cifar10_test(model, num_label=4000): # load data on the cpu with tf.device('/CPU:0'): # Load in training and test data X_train, y_train = tfds.as_numpy( tfds.load('cifar10', split='train', as_supervised=True, batch_size=-1)) #cifar_10.load_cifar_10() X_test, y_test = tfds.as_numpy( tfds.load('cifar10', split='test', as_supervised=True, batch_size=-1)) # one-hot encode the outs y_train = np.eye(10)[y_train.reshape(-1)] # print('y_train sample:', y_train[0:10]) y_test = np.eye(10)[y_test.reshape(-1)] # print('y_test sample:', y_test[0:10]) # cast it all to floats for image augmentation, rescale to [0,1] X_train = X_train.astype('float32') / np.float(255.0) X_test = X_test.astype('float32') / np.float(255.0) # whiten the data or apply zca X_train = whiten_norm(X_train) X_test = whiten_norm(X_test) # X_train = whiten_norm(X_train) # X_test = whiten_norm(X_test) # X_train, y_train, X_test, y_test = cifar_10.load_cifar_10() print('loaded cifar10', X_train.shape, X_test.shape) # Setup test set test = util.Data(X_test, y_test, None) # Split training test into labeled and unlabeled train = util.label_unlabel_split(X_train, y_train, num_label, 10) # Split training data into training and validation (train, valid) = util.train_test_valid_split(train.X, train.y, split=(0.9, 0.1), U=train.U) # One-hot encode cifar_10.y_train and cifar_10.y_test? ## ^^ yes. Done. print('TR:', train.X.shape, train.y.shape, train.U.shape) print('v', valid.X.shape, valid.y.shape) # fit on the gpu with tf.device('/GPU:0'): # Train model using training and validation sets hist = model.fit(train, valid) print('evaluating on (subset) of test set...') with tf.device('/CPU:0'): # Test the model using test set y_pred = model.predict(test.X[0:1000]) # if outputs are one-hot encoded, need to decode for correctness test # wrong = util.percent_wrong(y_pred, test.y) # acc = 1.0 - wrong acc = float( tf.reduce_mean( tf.keras.metrics.categorical_accuracy(test.y[0:1000], y_pred))) print(model.name, ' : acc:', acc) return model, {'hist': hist, 'acc': acc}
# TRAIN_BUFFER_SIZE = len(input_tensor_train) TEST_BUFFER_SIZE = len(input_tensor_test) BATCH_SIZE = 64 # TRAIN_N_BATCH = TRAIN_BUFFER_SIZE // BATCH_SIZE TEST_N_BATCH = TEST_BUFFER_SIZE // BATCH_SIZE embedding_dim = 256 units = 1024 vocab_inp_size = len(globalVocab.word2idx) target_size = len(emotion_dict) torch.cuda.set_device(0) # train_dataset = util.Data(input_tensor_train, target_tensor_train) test_dataset = util.Data(input_tensor_test, target_tensor_test) # train_dataset = DataLoader(train_dataset, batch_size = BATCH_SIZE, # drop_last=True, # shuffle=True) test_dataset = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=False, shuffle=False) # print(val_dataset.batch_size) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # torch.cuda.set_device(0)
import util amazon = "amazon_cells_labelled.txt" imdb = "imdb_labelled.txt" yelp = "yelp_labelled.txt" v2 = util.Data(amazon, imdb, yelp, quiet=False) res = v2.test() print(res)
print(len(input_tensor_test)) TRAIN_BUFFER_SIZE = len(input_tensor_train) TEST_BUFFER_SIZE = len(input_tensor_test) BATCH_SIZE = 64 TRAIN_N_BATCH = TRAIN_BUFFER_SIZE // BATCH_SIZE TEST_N_BATCH = TEST_BUFFER_SIZE // BATCH_SIZE embedding_dim = 256 units = 1024 vocab_inp_size = len(globalVocab.word2idx) target_size = len(emotion_dict) torch.cuda.set_device(0) train_dataset = util.Data(input_tensor_train, target_tensor_train) test_dataset = util.Data(input_tensor_test, target_tensor_test) train_dataset = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) test_dataset = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) # print(val_dataset.batch_size) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")