def infer_receiver(unused_argv): hparams = create_hparams(FLAGS) input_vocab = load_obj('input_vocab') label_vocab = load_obj('label_vocab') with tf.Session() as sess: #Before training started, need to check if user need to recover any pre-trained model cnn = TextCNN(hparams=hparams, mode=tf.contrib.learn.ModeKeys.TRAIN, source_vocab_table=input_vocab, target_vocab_table=label_vocab, scope=None, extra_args=None) #tf.global_variables_initializer().run() saver = tf.train.Saver() chpt = tf.train.latest_checkpoint(hparams.restore_checkpoint) if chpt: if tf.train.checkpoint_exists(chpt): saver.restore(sess, chpt) print("Model has been restored from %s" % (hparams.restore_checkpoint)) else: print("No existing model loaded from %s, exiting" % (hparams.restore_checkpoint)) return 0 #if hparams.debug is True: #sess = tf_debug.LocalCLIDebugWrapperSession(sess) eval(sess, cnn)
def main(): day = 27#input("What day do you want to simulate?") verbose = 0#input("Verbose? (to see all stations 1, to just see overloaded ones 0)") data = load_obj("4day") start_times, end_times = initialize_start_end(data, day) stations = load_obj("stationsq1q2") stations = transform(stations) initialize_stations(stations) # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(stations) # pp.pprint(end_times) curr_time = [int(i) for i in "00:00:00".split(":")] end_time = [int(i) for i in "24:00:00".split(":")] while greater_than(end_time, curr_time): # ipdb.set_trace(context=5) advance_time(curr_time, 60*5) start_times, end_times = update_stations(curr_time, stations, start_times, end_times) bad_stations = check_for_errors(stations) pp = pprint.PrettyPrinter(indent=4) # if verbose == 1 and bad_stations: # pp.pprint(stations) # if verbose == 0 and bad_stations: # pp.pprint(bad_stations) # input() pp.pprint(bad_stations)
def generate_email_receiver(session, model, starting_text='<eos>'): """Generate text from the model. Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ # state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) input_vocab = load_obj('input_vocab') label_vocab = load_obj('label_vocab') tokens = input_vocab.encode_word_list(starting_text.split()) tokens_length = len(tokens) # Pad with zero if token_length is smaller than sequence_lenth, otherwise cut it to sequence_length tokens = tokens+[0]*(model.sequence_length-tokens_length) if tokens_length <= model.sequence_length \ else tokens[0:model.sequence_length] #Convert the tokens to np array tokens = np.array(tokens) #print np.shape(tokens) #axis = 0 shall indicate the the batch number, in this evaluation case it is 1 tokens = np.expand_dims(tokens, axis=0) feed = {model.input_placeholder: tokens, model.dropout_placeholder: 1} y_pred = session.run(model.predictions, feed_dict=feed) #print np.shape(y_pred) #print np.shape(y_pred) #y_pred = np.squeeze(y_pred) #print ("(VAL) Evaluating the model using val dataset") # print prediction #y_pred = np.expand_dims(y_pred,axis=0) #print np.shape(y_pred) _, prediction_result = label_vector_to_index(y_pred, label_vocab) #print prediction_result #The 1st dimension of prediction_result could be batch_size, in this case since batch size is 1, so returen the 0 index prediction_result = prediction_result[0] #print np.shape(prediction_result) #print prediction_result return prediction_result
def RMSE(p, t, verbose=False, subject='', gp=False): # Base Results: base_results = { 'start_n': [12, 11, 12, 9], 'start_mean': [20.75, 12.91, 5.58, 27.33], 'start_std': [22.59, 9.87, 8.83, 30.49], 'switch_n': [16, 10, 14, 19, 13, 14, 14, 8], 'switch_mean': [11.13, 16, 15.29, 11.47, 11.69, 38.71, 6.86, 23.25], 'switch_std': [8.39, 13.13, 22.06, 13.38, 11.41, 28.18, 6.55, 26.44] } if subject: base_r = util.load_obj('base_results') base_results = { 'st_trial': base_r[subject][0], 'st_try': base_r[subject][1], 'sw_trial': base_r[subject][2], 'sw_try': base_r[subject][3], } prediction = np.array(p) target = np.array(base_results[t]) if gp: target = np.array(t) if verbose: print(f"Model Results: \n{prediction} \nBase Results: \n{target}\n") return np.sqrt(np.mean((prediction - target) ** 2))
def load_index(): if not util.is_saved(INVERTED_INDEX_FILE_NAME) and not util.is_saved( DOC_ID_NAME_INDEX_NAME) and not util.is_saved( TFIDF_NAME_INDEX_NAME): build_index() else: print('Found cached indexes! Using them ;)') _inverted_index: dict[str, indexer.Posting] = util.load_obj( INVERTED_INDEX_FILE_NAME) _doc_id_name_index: dict[int, str] = util.load_obj(DOC_ID_NAME_INDEX_NAME) _tfidf = util.load_pickle_as_pandas_df(TFIDF_NAME_INDEX_NAME) return { 'tfidf': _tfidf, 'inverted': _inverted_index, 'did_name': _doc_id_name_index }
def GetGeoTree(all=False): # print os.path.exists('geotree') if os.path.exists('geotree'): tree = util.load_obj('geotree') return tree geo_hash = pandas.read_csv( 'tianchi_mobile_recommend_train_user.csv.subset.csv') geo_hash = geo_hash.dropna() geo_count = dict() rule = [(0, 0), (1, 1e5), (2, 1e5), (3, 1e5), (4, 1e4), (5, 1e3), (6, 1e3)] for r in rule: if r[0] == 0: split_list = ['9', 'm', 'f'] for i in geo_hash['user_geohash']: util.IncDict(geo_count, i[:1]) else: split_list = [ i for i in geo_count.keys() if geo_count[i] > r[1] and len(i) == r[0] ] for i in geo_hash['user_geohash']: if i[:r[0]] in split_list: util.IncDict(geo_count, i[:r[0] + 1]) util.save_obj(geo_count, 'geotree') if all: return geo_count else: geo_tree = { i: geo_count[i] for i in geo_count.keys() if geo_count[i] > 1e5 or len(i) == 1 } return geo_tree
def unify_features(): train_labels = pd.read_csv( '~/Documents/thesis/dataset/dataSample/trainLabels.csv') section_features = load_obj('section_features') xref_features = load_obj('xref_features') opcode_1gram_features = load_obj('1gram_opcode_tfidf') byte_1gram_features = load_obj('1gram_byte_tfidf') # concat features with classes and IDs to create the dataset data = pd.concat([train_labels, xref_features, section_features, opcode_1gram_features, \ byte_1gram_features], axis=1, sort=False) print(data.shape) save_obj(data, 'interim_data') return data
def initializeGL(self): gl.glClearColor(1.0, 1.0, 1.0, 0.0) # gl.glColor3f(0.0,0.0, 0.0) # gl.glPointSize(4.0) # gl.glMatrixMode(gl.GL_PROJECTION) # gl.glLoadIdentity() # glu.gluOrtho2D(0.0,640.0,0.0,480.0) gl.glViewport(0, 0, 800, 600) gl.glClearColor(0.0, 0.5, 0.5, 1.0) gl.glEnableClientState(gl.GL_VERTEX_ARRAY) self._vertices, self._normals, self._indices = \ util.load_obj_with_index("monkey.obj") a, b = util.load_obj("monkey.obj") self._vbo = gl.glGenBuffers(1) gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._vbo) gl.glBufferData(gl.GL_ARRAY_BUFFER, self._vertices.size * 4, self._vertices, gl.GL_STATIC_DRAW) self._normal_vbo = gl.glGenBuffers(1) gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._normal_vbo) gl.glBufferData(gl.GL_ARRAY_BUFFER, self._normals.size * 4, self._normals, gl.GL_STATIC_DRAW) self._index_buffer = gl.glGenBuffers(1) gl.glBindBuffer(gl.GL_ELEMENT_ARRAY_BUFFER, self._index_buffer) gl.glBufferData(gl.GL_ELEMENT_ARRAY_BUFFER, len(self._indices) * 4, (ctypes.c_uint * len(self._indices))(*self._indices), gl.GL_STATIC_DRAW) self._shader_program = shader.LoadShaders("shader9.vs", "shader9.ps") gl.glEnable(gl.GL_DEPTH_TEST) gl.glDepthFunc(gl.GL_LESS)
def initializeGL(self): gl.glClearColor(1.0,1.0,1.0,0.0) # gl.glColor3f(0.0,0.0, 0.0) # gl.glPointSize(4.0) # gl.glMatrixMode(gl.GL_PROJECTION) # gl.glLoadIdentity() # glu.gluOrtho2D(0.0,640.0,0.0,480.0) gl.glViewport (0, 0, 800, 600) gl.glClearColor (0.0, 0.5, 0.5, 1.0) gl.glEnableClientState (gl.GL_VERTEX_ARRAY) self._vertices, self._normals = util.load_obj("monkey.obj") self._vbo = gl.glGenBuffers(1) gl.glBindBuffer (gl.GL_ARRAY_BUFFER, self._vbo) gl.glBufferData (gl.GL_ARRAY_BUFFER, self._vertices.size*4, self._vertices, gl.GL_STATIC_DRAW) self._normal_vbo = gl.glGenBuffers(1) gl.glBindBuffer (gl.GL_ARRAY_BUFFER, self._normal_vbo) gl.glBufferData (gl.GL_ARRAY_BUFFER, self._normals.size*4, self._normals, gl.GL_STATIC_DRAW) self._shader_program = shader.LoadShaders("shader8.vs", "shader8.ps") gl.glEnable(gl.GL_DEPTH_TEST) gl.glDepthFunc(gl.GL_LESS)
def GetGeoTree(all = False): # print os.path.exists('geotree') if os.path.exists('geotree'): tree = util.load_obj('geotree') return tree geo_hash = pandas.read_csv('tianchi_mobile_recommend_train_user.csv.subset.csv') geo_hash = geo_hash.dropna() geo_count=dict() rule = [(0,0),(1,1e5), (2,1e5),(3,1e5),(4,1e4),(5,1e3),(6,1e3)] for r in rule: if r[0]==0: split_list = ['9','m','f'] for i in geo_hash['user_geohash']: util.IncDict(geo_count, i[:1]) else: split_list=[i for i in geo_count.keys() if geo_count[i]>r[1] and len(i)==r[0] ] for i in geo_hash['user_geohash']: if i[:r[0]] in split_list: util.IncDict(geo_count, i[:r[0]+1]) util.save_obj(geo_count, 'geotree') if all: return geo_count else: geo_tree = {i:geo_count[i] for i in geo_count.keys() if geo_count[i]>1e5 or len(i)==1} return geo_tree
def __init__(self, obj_file, name, _id, text): self.obj_file = obj_file self.name = name self._id = _id self.text = text if obj_file != None: self.v, self.f = util.load_obj(obj_file)
def get_trees(self): if self.trees == []: trees = util.load_obj(self.filename) if trees is None: trees = self._generate_trees() util.save_obj(trees, self.filename) self.trees = trees return self.trees
def quantization_predict(*args): imu_measurements = args[0] if len(args) == 1: # load model kmeans = load_obj('kmeans_model.pkl') else: kmeans = args[1] return kmeans.predict(imu_measurements)
def get_trees(self): if self.trees == []: # attempt to load cache trees = util.load_obj(self.filename) if trees is None or trees == []: # not cached yet trees = self._generate_trees() util.save_obj(trees, self.filename) # save cache self.trees = trees return self.trees
def mapview(): # creating a map in the view fn = "Divvy_Trips_2017_Q3Q4/Divvy_Stations_2017_Q3Q4.csv" otherfn = "Divvy_Trips_2017_Q3Q4/Divvy_Trips_2017_Q3.csv" data = readdict(fn) # other_data = readdict(otherfn) # other_data = data_cleanup_missing(other_data) # frequency_dictionary, most_common = get_frequency_dictionaries(other_data, other_data[0].keys()) # print(frequency_dictionary['gender']) frequency_dictionary = load_obj('frequency_dictionary') # frequency_dictionary, most_common = standard_procedures(otherfn) lat = get_attribute(data, "latitude", float) lon = get_attribute(data, "longitude", float) names = get_attribute(data, "name") stations = [] stations2 = [] for idx, l in enumerate(lat): stations.append(( lat[idx], lon[idx], names[idx], "https://maps.gstatic.com/intl/en_us/mapfiles/markers2/measle_blue.png" )) stations2.append({ "lat": lat[idx], "lng": lon[idx], "name": names[idx], "img": "https://maps.gstatic.com/intl/en_us/mapfiles/markers2/measle_blue.png" }) mymap = Map(scale=2, identifier="view-side", lat=41.8781, lng=-87.6298, markers=stations, fit_markers_to_bounds=True, style="height:500px;width:100%;") # bikeLayer = GoogleMaps.BicyclingLayer # BicyclingLayer.setMap(mymap) stations2 = json.dumps(stations2) # stations2 = stations2.replace("\"","") # print(stations2) # stations2 = json.loads(stations2) # print(stations2) # print(stations) return render_template('example.html', mymap=mymap, json_stations=stations2, frequencies=json.dumps(frequency_dictionary))
def model_test(): hparams = create_or_load_hparams(hparams_file=HFILE, default_hparams=None) config = tf.ConfigProto(log_device_placement=hparams.log_device_placement, allow_soft_placement=hparams.allow_soft_placement) input_vocab = load_obj(ROOT_PATH, 'general_vocab') label_vocab = load_obj(ROOT_PATH, 'mpss_pl_vocab') with tf.Session(config=config) as sess: cnn = TextCNN(hparams=hparams, mode=tf.contrib.learn.ModeKeys.TRAIN, source_vocab_table=input_vocab, target_vocab_table=label_vocab, scope=None, extra_args=None) saver = tf.train.Saver(tf.global_variables(), max_to_keep=hparams.num_checkpoints) chpt = tf.train.latest_checkpoint(hparams.restore_checkpoint) if chpt: if tf.train.checkpoint_exists(chpt): saver.restore(sess, chpt) print("Model has been resotre from %s" % (hparams.restore_checkpoint)) else: print("No pre-trained model loaded, abort!!!") return sess.run(tf.local_variables_initializer()) predict_result = cnn.predict(sess=sess, input_txt=TEST_EMAIL, input_vocab=input_vocab, label_vocab=label_vocab) print("Predicted result is %s" % predict_result)
def to_one_csv(): DIRPATH = ['./npy_data/tihm15/UTI_mike/', './npy_data/tihmdri/UTI_test/'] SAVE_PATH = './csv_data/one_csv/data.csv' data = {'Patient_id': [], 'Date': [], 'Symptoms': [], 'Validation': []} for path in DIRPATH: filenames = _iter_directory(path) for f in filenames: validations = load_obj(path + f) for valid in validations[1]: data['Patient_id'].append(f.split('.')[0]) data['Date'].append(valid[0]) data['Symptoms'].append(valid[1]) data['Validation'].append(valid[2]) df = pd.DataFrame(data) df.to_csv(SAVE_PATH)
def initializeGL(self): gl.glClearColor(1.0, 1.0, 1.0, 0.0) # gl.glColor3f(0.0,0.0, 0.0) # gl.glPointSize(4.0) # gl.glMatrixMode(gl.GL_PROJECTION) # gl.glLoadIdentity() # glu.gluOrtho2D(0.0,640.0,0.0,480.0) gl.glViewport(0, 0, 800, 600) gl.glClearColor(0.0, 0.5, 0.5, 1.0) gl.glEnableClientState(gl.GL_VERTEX_ARRAY) self._vertices, self._normals = util.load_obj("monkey.obj") self._vbo = gl.glGenBuffers(1) # ar = array("f", vertices) gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._vbo) gl.glBufferData(gl.GL_ARRAY_BUFFER, self._vertices.size * 4, self._vertices, gl.GL_STATIC_DRAW) self._shader_program = shader.LoadShaders("shader7.vs", "shader7.ps") gl.glEnable(gl.GL_DEPTH_TEST) gl.glDepthFunc(gl.GL_LESS)
def to_separate_csv(): DIRPATH = './npy_data/tihm15/Agitation_mike/' SAVE_PATH = './csv_data/analysation/mike/agitation/' INDEX = [ 'Fridge', 'living room', 'Bathroom', 'Hallway', 'Bedroom', 'Kitchen', 'Microwave', 'Kettle' ] filenames = _iter_directory(DIRPATH) data = [] info = [] for f in filenames: a = load_obj(DIRPATH + f) data.append(a[0]) info.append(a[1]) for i, patient in enumerate(info): for j, day in enumerate(patient): filename = filenames[i].split('.')[0] + '_' + day[0] + '_' + str( day[2]) + '.csv' p_data = data[i][j] df = pd.DataFrame(p_data, index=INDEX) df.to_csv(SAVE_PATH + filename)
def preprocess(dataset, explainer): # Dataset preparation data = class_name = None dtypes = {} encoded_data = None if (dataset == 'texas'): # class_name = 'PRINC_SURG_PROC_CODE' dtypes = load_obj('data/' + dataset + '/dtypes') data = pd.read_csv('data/' + dataset + '/' + dataset + '_mapped.csv', dtype=dtypes) columns2remove = ['RECORD_ID', 'PRINC_ICD9_CODE'] data.drop(columns2remove, inplace=True, axis=1) print("Splitting ...") bb_train, bb_val, sh_train, sh_val, r2E, test = split(data, class_name) bb_train.to_csv('data/' + dataset + '/baseline_split/bb_train_mapped.csv', index=False) print("bb_train saved") bb_val.to_csv('data/' + dataset + '/baseline_split/bb_val_mapped.csv', index=False) print("bb_val saved") sh_train.to_csv('data/' + dataset + '/baseline_split/sh_train_mapped.csv', index=False) print("sh_train saved") sh_val.to_csv('data/' + dataset + '/baseline_split/sh_val_mapped.csv', index=False) print("sh_val saved") r2E.to_csv('data/' + dataset + '/baseline_split/r2E_mapped.csv', index=False) print("r2E saved") test.to_csv('data/' + dataset + '/baseline_split/test_mapped.csv', index=False) else: data, class_name = prepare_dataset(dataset, explainer) # Mapping mapped_data = map_columns(data, class_name) mapped_data.to_csv('data/' + dataset + '/' + dataset + '_mapped.csv') # Encoding if (dataset == 'adult'): class_name = 'class' for col in data.columns: if (col in ['capital-gain', 'capital-loss']): dtypes[col] = 'float32' elif (col in ['age', 'hours-per-week']): dtypes[col] = 'int64' else: dtypes[col] = 'object' if (dataset == 'mobility'): class_name = 'class' for col in data.columns: if (col in [ 'max_distance_from_home', 'maximum_distance', 'max_tot', 'distance_straight_line', 'sld_avg', 'radius_of_gyration', 'norm_uncorrelated_entropy', 'nlr', 'home_freq_avg', 'work_freq_avg', 'hf_tot_df', 'wf_tot_df', 'n_user_home_avg', 'n_user_work_avg', 'home_entropy', 'work_entropy' ]): dtypes[col] = 'float32' elif (col in [ 'uid', 'wait', 'number_of_visits', 'nv_avg', 'number_of_locations', 'raw_home_freq', 'raw_work_freq', 'raw_least_freq', 'n_user_home', 'n_user_work' ]): dtypes[col] = 'int64' else: dtypes[col] = 'object' encoded_data = encode_Dask_dataset(dd.from_pandas(data, npartitions=1), class_name, dtypes, []) #encoded_data, feature_names, class_values, numeric_columns, rdf, real_feature_names, features_map = encode_dataset(data, class_name) encoded_data.to_csv('data/' + dataset + '/' + dataset + '_encoded.csv') # Splitting both datasets bb_train, bb_val, sh_train, sh_val, r2E, test = split(data, class_name) bb_train_m, bb_val_m, sh_train_m, sh_val_m, r2E_m, test_m = split( mapped_data, class_name) bb_train_e, bb_val_e, sh_train_e, sh_val_e, r2E_e, test_e = split( encoded_data, class_name) # Writing datasets if (len(bb_train) + len(bb_val) + len(sh_train) + len(sh_val) + len(r2E) + len(test) == len(data) and len(bb_train_e) + len(bb_val_e) + len(sh_train_e) + len(sh_val_e) + len(r2E_e) + len(test_e) == len(encoded_data)): print('Dataset: ' + dataset) bb_train.to_csv('data/' + dataset + '/baseline_split/bb_train.csv', index=False) bb_train_m.to_csv('data/' + dataset + '/baseline_split/bb_train_mapped.csv', index=False) bb_train_e.to_csv('data/' + dataset + '/baseline_split/bb_train_e.csv', index=False) print("bb_train saved") bb_val.to_csv('data/' + dataset + '/baseline_split/bb_val.csv', index=False) bb_val_m.to_csv('data/' + dataset + '/baseline_split/bb_val_mapped.csv', index=False) bb_val_e.to_csv('data/' + dataset + '/baseline_split/bb_val_e.csv', index=False) print("bb_val saved") sh_train.to_csv('data/' + dataset + '/baseline_split/sh_train.csv', index=False) sh_train_m.to_csv('data/' + dataset + '/baseline_split/sh_train_mapped.csv', index=False) sh_train_e.to_csv('data/' + dataset + '/baseline_split/sh_train_e.csv', index=False) print("sh_train saved") sh_val.to_csv('data/' + dataset + '/baseline_split/sh_val.csv', index=False) sh_val_m.to_csv('data/' + dataset + '/baseline_split/sh_val_mapped.csv', index=False) sh_val_e.to_csv('data/' + dataset + '/baseline_split/sh_val_e.csv', index=False) print("sh_val saved") r2E.to_csv('data/' + dataset + '/baseline_split/r2E.csv', index=False) r2E_m.to_csv('data/' + dataset + '/baseline_split/r2E_mapped.csv', index=False) r2E_e.to_csv('data/' + dataset + '/baseline_split/r2E_e.csv', index=False) print("r2E saved") test.to_csv('data/' + dataset + '/baseline_split/test.csv', index=False) test_m.to_csv('data/' + dataset + '/baseline_split/test_mapped.csv', index=False) test_e.to_csv('data/' + dataset + '/baseline_split/test_e.csv', index=False) print("test saved") else: print("Error in splitted datasets sizes")
with torch.no_grad(): for i, (im, _) in enumerate(target_dataset): frame_tensor = torch.zeros(len(target_dataset)) frame_tensor[j] = 1 frame_tensor = frame_tensor.cuda() deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten() deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3)) deformed_vtxs = torch.clamp(deformed_vtxs, -1.0, 1.0) #write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist()) util.write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist(), vtx_col.detach().cpu().tolist()) np.savez('vtx_col.npz', vtx_col=vtx_col.cpu().detach().numpy()) if __name__ == '__main__': # mesh = util.load_obj('sphere.obj') mesh = util.load_obj('prediction.obj') vtx_pos = mesh['vtx_pos'] # make all positive vtx_pos += vtx_pos.min() vtx_pos -= vtx_pos.min() vtx_pos /= vtx_pos.max() vtx_pos -= 0.5 mesh['vtx_pos'] = vtx_pos for k,v in mesh.items(): assert(v.shape[1] == 3) fit_mesh_col(mesh, 'images/bottle')
uv, uv_idx = init_uv() uv_idx = uv_idx[:pos_idx.shape[0]] pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda() uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda() vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda() tex = torch.from_numpy(tex.astype(np.float32)).cuda() # Render reference and optimized frames. Always enable mipmapping for reference. color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, 1024, False, 0) Image.fromarray((color[0].detach().cpu().numpy() * 255).astype( np.uint8)).save('test.png') if __name__ == '__main__': mesh = util.load_obj('sphere.obj') #mesh = util.load_obj('prediction.obj') vtx_pos = mesh['vtx_pos'] # make all positive vtx_pos += vtx_pos.min() vtx_pos -= vtx_pos.min() vtx_pos /= vtx_pos.max() vtx_pos -= 0.5 mesh['vtx_pos'] = vtx_pos for k, v in mesh.items(): assert (v.shape[1] == 3) fit_mesh(mesh, 'images/bottle')
import os import sys utils_dir = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) utils_dir = os.path.join(utils_dir, 'utils') sys.path.append(utils_dir) import util parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class') write_class = util.load_obj(wwcd) writers = [] # each entry is a (writer, [list of (file, class)]) tuple cimages = [] (cw, _, _) = write_class[0] for (w, f, c) in write_class: if w != cw: writers.append((cw, cimages)) cw = w cimages = [(f, c)] cimages.append((f, c)) writers.append((cw, cimages)) ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer') util.save_obj(writers, ibwd)
def load_treebank(filename): return util.load_obj(filename)
def run(args, num_workers=1, log_interval=100, verbose=True, save_path=None): code_root = os.path.dirname(os.path.realpath(__file__)) if not os.path.isdir('{}/{}_result_files/'.format(code_root, args.task)): os.mkdir('{}/{}_result_files/'.format(code_root, args.task)) path = '{}/{}_result_files/'.format( code_root, args.task) + utils.get_path_from_args(args) print('File saved in {}'.format(path)) if os.path.exists(path + '.pkl') and not args.rerun: print('File has already existed. Try --rerun') return utils.load_obj(path) start_time = time.time() utils.set_seed(args.seed) # --------------------------------------------------------- # -------------------- training --------------------------- # initialise model model = user_preference_estimator(args).cuda() model.train() print(sum([param.nelement() for param in model.parameters()])) # set up meta-optimiser for model parameters meta_optimiser = torch.optim.Adam(model.parameters(), args.lr_meta) # scheduler = torch.optim.lr_scheduler.StepLR(meta_optimiser, 5000, args.lr_meta_decay) # initialise logger logger = Logger() logger.args = args # initialise the starting point for the meta gradient (it's faster to copy this than to create new object) meta_grad_init = [0 for _ in range(len(model.state_dict()))] dataloader_train = DataLoader(Metamovie(args), batch_size=1, num_workers=args.num_workers) for epoch in range(args.num_epoch): x_spt, y_spt, x_qry, y_qry = [], [], [], [] iter_counter = 0 for step, batch in enumerate(dataloader_train): if len(x_spt) < args.tasks_per_metaupdate: x_spt.append(batch[0][0].cuda()) y_spt.append(batch[1][0].cuda()) x_qry.append(batch[2][0].cuda()) y_qry.append(batch[3][0].cuda()) if not len(x_spt) == args.tasks_per_metaupdate: continue if len(x_spt) != args.tasks_per_metaupdate: continue # initialise meta-gradient meta_grad = copy.deepcopy(meta_grad_init) loss_pre = [] loss_after = [] for i in range(args.tasks_per_metaupdate): loss_pre.append(F.mse_loss(model(x_qry[i]), y_qry[i]).item()) fast_parameters = model.final_part.parameters() for weight in model.final_part.parameters(): weight.fast = None for k in range(args.num_grad_steps_inner): logits = model(x_spt[i]) loss = F.mse_loss(logits, y_spt[i]) grad = torch.autograd.grad(loss, fast_parameters, create_graph=True) fast_parameters = [] for k, weight in enumerate(model.final_part.parameters()): if weight.fast is None: weight.fast = weight - args.lr_inner * grad[ k] #create weight.fast else: weight.fast = weight.fast - args.lr_inner * grad[k] fast_parameters.append(weight.fast) logits_q = model(x_qry[i]) # loss_q will be overwritten and just keep the loss_q on last update step. loss_q = F.mse_loss(logits_q, y_qry[i]) loss_after.append(loss_q.item()) task_grad_test = torch.autograd.grad(loss_q, model.parameters()) for g in range(len(task_grad_test)): meta_grad[g] += task_grad_test[g].detach() # -------------- meta update -------------- meta_optimiser.zero_grad() # set gradients of parameters manually for c, param in enumerate(model.parameters()): param.grad = meta_grad[c] / float(args.tasks_per_metaupdate) param.grad.data.clamp_(-10, 10) # the meta-optimiser only operates on the shared parameters, not the context parameters meta_optimiser.step() #scheduler.step() x_spt, y_spt, x_qry, y_qry = [], [], [], [] loss_pre = np.array(loss_pre) loss_after = np.array(loss_after) logger.train_loss.append(np.mean(loss_pre)) logger.valid_loss.append(np.mean(loss_after)) logger.train_conf.append(1.96 * np.std(loss_pre, ddof=0) / np.sqrt(len(loss_pre))) logger.valid_conf.append(1.96 * np.std(loss_after, ddof=0) / np.sqrt(len(loss_after))) logger.test_loss.append(0) logger.test_conf.append(0) utils.save_obj(logger, path) # print current results logger.print_info(epoch, iter_counter, start_time) start_time = time.time() iter_counter += 1 if epoch % (2) == 0: print('saving model at iter', epoch) logger.valid_model.append(copy.deepcopy(model)) return logger, model
INDEX_TYPE = 4 INDEX_DIR = "./Indices/INDEX{0}/".format(INDEX_TYPE) REMOVE_STOP_WORDS_QUERY = False STEM_QUERY = False if INDEX_TYPE == 2: REMOVE_STOP_WORDS_QUERY = True elif INDEX_TYPE == 3: STEM_QUERY = True elif INDEX_TYPE >= 4: REMOVE_STOP_WORDS_QUERY = True STEM_QUERY = True INDEX = INDEX_DIR + "INDEX.dat" CATALOG = util.load_obj(INDEX_DIR + "CATALOG.pkl") DOC_LEN_MAP = util.load_obj(INDEX_DIR + "DOC_LEN_MAP.pkl") META_DATA = util.load_obj(INDEX_DIR + "META.pkl") DOC_ID_MAP = util.load_obj('DOC_ID_MAP.pkl') ID_DOC_MAP = {v : k for k, v in DOC_ID_MAP.items()} AVG_DOC_LENGTH = META_DATA['average_doc_len'] TOTAL_DOCS = META_DATA['total_docs'] TOTAL_TOKENS = META_DATA['total_tokens'] VOCAB_LEN = META_DATA['vocab_len'] QUERY_FILE = "query_desc.51-100.short.txt" NO_OF_TOP_RESULTS = 1000 MODELS = ['tfidf', 'bm25', 'laplace', 'rsv', 'prox_rsv']
#!/usr/bin/env python # -*- coding: utf-8 -*- import util co_li = util.load_obj('cookies.pkl') cookies = util.get_cookie_dir(co_li) print(cookies)
def GenFeature(finput='user_action_train.csv', fitem='tianchi_mobile_recommend_train_item.csv', foutput='feature.csv', lastday='2014-12-18'): geotree = util.load_obj('geotree') user_itemgeo_count_before_lastday = dict() user_itemgeo_car_before_lastday = dict() user_itemgeo_star_before_lastday = dict() user_itemgeo_buy_before_lastday = dict() user_itemgeo_count_lastday = dict() user_itemgeo_car_lastday = dict() user_itemgeo_star_lastday = dict() user_itemgeo_buy_lastday = dict() item_itemgeo = dict() user_count_lastday = dict() user_star_lastday = dict() user_car_lastday = dict() user_buy_lastday = dict() user_count_before_lastday = dict() user_star_before_lastday = dict() user_car_before_lastday = dict() user_buy_before_lastday = dict() user_geocount = dict() user_items = set() itemfile = open(fitem, 'rb') itemreader = csv.reader(itemfile, delimiter=',') for row in itemreader: tid = row[0] if len(row[1]) > 1: getItemGeoDict(item_itemgeo, com.GeoMatch(row[1], geotree), tid) else: item_itemgeo[tid] = [] item_itemgeo[tid].append(-1) #if len(row[1])>1: #item_itemgeo[tid] = com.GeoMatch(row[1],geotree) #else: #item_itemgeo[tid] = -1 with open(finput, 'rb') as f: reader = csv.reader(f, delimiter=',') header = reader.next() print header i = 0 for row in reader: uid = row[0] tid = row[1] cid = row[4] user_items.add('%s_%s' % (uid, tid)) if tid not in item_itemgeo: continue if item_itemgeo[tid][0] != -1: if row[5][:10] == lastday: if row[2] == '1': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_count_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) elif row[2] == '2': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_star_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) elif row[2] == '3': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_car_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) elif row[2] == '4': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_buy_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) else: if row[2] == '1': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_count_before_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) elif row[2] == '2': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_star_before_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) elif row[2] == '3': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_car_before_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) elif row[2] == '4': for itemgeohash in item_itemgeo[tid]: IncDict(user_itemgeo_buy_before_lastday, '%s_%s' % (uid, itemgeohash), len(item_itemgeo[tid])) i = i + 1 if i % 100000 == 0: print 'processed %d scores!' % i fd = open(foutput, 'wb') fw = csv.writer(fd, delimiter=',') fw.writerow([ 'user_itemgeo_count_before_lastday', 'user_itemgeo_car_before_lastday', 'user_itemgeo_star_before_lastday', 'user_itemgeo_buy_before_lastday', 'user_itemgeo_count_lastday', 'user_itemgeo_car_lastday', 'user_itemgeo_star_lastday', 'user_itemgeo_buy_lastday' ]) for key in user_items: uid, tid = key.split('_') if tid not in item_itemgeo: data = ['', '', '', '', '', '', '', '', '', ''] continue if item_itemgeo[tid][0] != -1: #data = np.zeros(8) temp_count_lastday = 0 temp_star_lastday = 0 temp_car_lastday = 0 temp_buy_lastday = 0 temp_count_before_lastday = 0 temp_star_before_lastday = 0 temp_car_before_lastday = 0 temp_buy_before_lastday = 0 for itemgeohash in item_itemgeo[tid]: key_uid_itemgeo = '%s_%s' % (uid, itemgeohash) #每一个item可能对应多个itemgeo 特征要取均值 temp_count_lastday += GetDict(user_itemgeo_count_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_star_lastday += GetDict(user_itemgeo_star_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_car_lastday += GetDict(user_itemgeo_car_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_buy_lastday += GetDict(user_itemgeo_buy_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_count_before_lastday += GetDict( user_itemgeo_car_before_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_star_before_lastday += GetDict( user_itemgeo_star_before_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_car_before_lastday += GetDict( user_itemgeo_car_before_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) temp_buy_before_lastday += GetDict( user_itemgeo_buy_before_lastday, key_uid_itemgeo) / len( item_itemgeo[tid]) data = [ uid, tid, temp_count_lastday, temp_star_lastday, temp_car_lastday, temp_buy_lastday, temp_count_before_lastday, temp_star_before_lastday, temp_car_before_lastday, temp_buy_before_lastday ] fw.writerow(data) #else: com.FillAvgData(foutput, '%s_filled.csv' % foutput, log=True)
def GetFeature(data): cluster = util.load_obj('%s_cluster.model' % __fname__) cluster_ids = cluster.predict(data[[i for i in data.keys() if i not in ['user_id','item_id','buy']]]) data['cluster_0'] = cluster_ids==0 data['cluster_1'] = cluster_ids==1 data['cluster_2'] = cluster_ids==2 data['cluster_3'] = cluster_ids==3 data['cluster_4'] = cluster_ids==4 data['user_is_robot'] = RobotRule(data).astype(int) data['item_nouser'] = NouserItem(data).astype(int) data['user_left_item'] = UserLeftItem(data).astype(int) data['user_high_and_active'] = ((data['user_lastday_count']>5) & (data['user_action_count']>100)).astype(int) data['item_is_new'] =((data['item_lastday_count']>8) & (data['item_before_halfmonth_click']==0)).astype(int) data['user_is_new'] = ((data['user_lastday_count']>0)&(data['user_action_count']<5*data['user_lastday_count'])).astype(int) data['user_lastday_active'] = ((data['user_lastday_count']>100)).astype(int) data['user_active_item_active_nobuy'] = ((data['user_lastday_count']>10) & (data['item_lastday_count']>10) & (data['user_item_click_nobuy'])).astype(int) data['user_like_watch'] =(data['user_action_count']/(1+data['user_lastday_count'])>20) & (data['user_buy_count']==0) data['user_almost_buy'] = ((data['user_item_lastday_click_nobuy']) & (data['user_item_lastday_cart_nobuy']) & (data['user_item_lastday_star_nobuy'])).astype(int) nolog = ['user_id','item_id', 'buy'] factor_features = [ "user_high_and_active", "item_is_new", "user_is_new", "user_lastday_active", "user_active_item_active_nobuy", "user_like_watch", "cluster_0", "cluster_1", "cluster_2", "cluster_3", "cluster_4", "cat_lastday_buy_again", "cat_lastday_cart_nobuy", "cat_lastday_click_nobuy", "cat_lastday_star_nobuy", "item_geo_4", "item_geo_9", "item_geo_f", "item_geo_m", "item_geo_t", "item_lastday_buy_again", "item_lastday_cart_nobuy", "item_lastday_click_nobuy", "item_lastday_star_nobuy", "user_cat_lastday_buy_again", "user_cat_lastday_cart_nobuy", "user_cat_lastday_click_nobuy", "user_cat_lastday_star_nobuy", "user_geo_4", "user_geo_5", "user_geo_9", "user_geo_b", "user_geo_f", "user_geo_i", "user_geo_m", "user_geo_o", "user_geo_t", "user_geo_v", "user_in_hot_pos_1", "user_in_hot_pos_2", "user_in_hot_pos_3", "user_in_hot_pos_4", "user_in_hot_pos_5", "user_in_hot_pos_6", "user_in_hot_pos_7", "user_item_buy_again", "user_item_cart_nobuy", "user_item_click_nobuy", "user_item_lastday_buy_again", "user_item_lastday_cart_nobuy", "user_item_lastday_click_nobuy", "user_item_lastday_star_nobuy", "user_item_star_nobuy", "user_is_robot", "item_nouser", "user_left_item", ] signed_log_features = [ "user_cat_aveThreeDayDelta_add_car", "user_cat_aveThreeDayDelta_buy", "user_cat_aveThreeDayDelta_click", "user_cat_aveThreeDayDelta_star", "user_item_aveThreeDayDelta_add_car", "user_item_aveThreeDayDelta_buy", "user_item_aveThreeDayDelta_click", "user_item_aveThreeDayDelta_star", ] log_features = [ "cat_add_car", "cat_add_star", "cat_before_halfmonth_add_car", "cat_before_halfmonth_buy", "cat_before_halfmonth_click", "cat_before_halfmonth_star", "cat_buy_count", "cat_buy_user_number", "cat_click_count", "cat_halfmonth_add_car", "cat_halfmonth_buy", "cat_halfmonth_click", "cat_halfmonth_star", "cat_lastday_add_car", "cat_lastday_buy", "cat_lastday_click", "cat_lastday_star", "cat_lastweek_add_car", "cat_lastweek_buy", "cat_lastweek_click", "cat_lastweek_star", "geo_users_number", "item_added_car", "item_added_start", "item_before_halfmonth_add_car", "item_before_halfmonth_buy", "item_before_halfmonth_click", "item_before_halfmonth_star", "item_buy_count", "item_buy_user_number", "item_click_count", "item_halfmonth_add_car", "item_halfmonth_buy", "item_halfmonth_click", "item_halfmonth_star", "item_lastday_add_car", "item_lastday_buy", "item_lastday_click", "item_lastday_count", "item_lastday_star", "item_lastweek_add_car", "item_lastweek_buy", "item_lastweek_click", "item_lastweek_star", "user_action_count", "user_add_car", "user_add_star", "user_buy_cat_number", "user_buy_count", "user_buy_item_number", "user_cat_before_halfmonth_add_car", "user_cat_before_halfmonth_buy", "user_cat_before_halfmonth_click", "user_cat_before_halfmonth_star", "user_cat_count", "user_cat_halfmonth_add_car", "user_cat_halfmonth_buy", "user_cat_halfmonth_click", "user_cat_halfmonth_star", "user_cat_lastday_add_cart", "user_cat_lastday_add_star", "user_cat_lastday_buy", "user_cat_lastday_count", "user_cat_lastweek_add_car", "user_cat_lastweek_buy", "user_cat_lastweek_click", "user_cat_lastweek_star", "user_item_before_halfmonth_add_car", "user_item_before_halfmonth_buy", "user_item_before_halfmonth_click", "user_item_before_halfmonth_star", "user_item_buy", "user_item_count", "user_item_halfmonth_add_car", "user_item_halfmonth_buy", "user_item_halfmonth_click", "user_item_halfmonth_star", "user_item_lastday_add_cart", "user_item_lastday_add_star", "user_item_lastday_buy", "user_item_lastday_count", "user_item_lasttime", "user_item_lastweek_add_car", "user_item_lastweek_buy", "user_item_lastweek_click", "user_item_lastweek_star", "user_lastday_add_cart", "user_lastday_add_star", "user_lastday_buy", "user_lastday_count", "usergeo_item_before_lastday_buy", "usergeo_item_before_lastday_cart", "usergeo_item_before_lastday_click", "usergeo_item_before_lastday_star", "usergeo_item_lastday_buy", "usergeo_item_lastday_cart", "usergeo_item_lastday_click", "usergeo_item_lastday_star", ] linear_features = [ "user_item_geo_distance", ] X1 = np.log(.3+data[log_features]) X2 = data[factor_features] X3 = data[linear_features] X4 = np.copysign(np.log(.3+np.abs(data[signed_log_features])),np.sign(data[signed_log_features])) X = pandas.concat([X1, X2,X3, X4], axis=1) # transformer = sklearn.preprocessing.MinMaxScaler() # X = transformer.fit_transform(X[_feature_names]) return X[_feature_names]
import hashlib import os import sys utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) utils_dir = os.path.join(utils_dir, 'utils') sys.path.append(utils_dir) import util # noqa: E402 parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs') wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs') class_file_dirs = util.load_obj(cfd) write_file_dirs = util.load_obj(wfd) class_file_hashes = [] write_file_hashes = [] count = 0 for tup in class_file_dirs: if count % 100000 == 0: print('hashed %d class images' % count) (cclass, cfile) = tup file_path = os.path.join(parent_path, cfile) chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest() class_file_hashes.append((cclass, cfile, chash))
# Created: 03/03/2015 # Copyright: (c) Animesh 2015 # Licence: <your licence> # ------------------------------------------------------------------------------- from __future__ import division import itertools, constants, getTextFromDoc, util, tokenizer, re from collections import Counter import os, json from stemming.porter2 import stem from datetime import datetime from time import time import termStats import sys, shutil, StringIO DOC_ID_MAP = util.load_obj('DOC_ID_MAP.pkl') vocab = set() v = open(os.path.join(constants.TEMP_DIR, 'vocab.dat'), "w") doc_len_map = dict() total_tokens = 0 no_of_docs = 0 def termPositions(lst, element): result = [] offset = -1 while True: try: offset = lst.index(element, offset + 1) except ValueError:
fast_parameters.append(weight.fast) loss_all.append(F.l1_loss(y_qry[i], model(x_qry[i])).item()) loss_all = np.array(loss_all) print('{}+/-{}'.format(np.mean(loss_all), 1.96 * np.std(loss_all, 0) / np.sqrt(len(loss_all)))) if __name__ == '__main__': args = parse_args() if not args.test: run(args, num_workers=1, log_interval=100, verbose=True, save_path=None) else: utils.set_seed(args.seed) code_root = os.path.dirname(os.path.realpath(__file__)) mode_path = utils.get_path_from_args(args) mode_path = '9b8290dd3f63cbafcd141ba21282c783' path = '{}/{}_result_files/'.format(code_root, args.task) + mode_path logger = utils.load_obj(path) model = logger.valid_model[-1] dataloader_test = DataLoader( Metamovie(args, partition='test', test_way='old'), #old, new_user, new_item, new_item_user batch_size=1, num_workers=args.num_workers) evaluate_test(args, model, dataloader_test) # --- settings ---
returns: - 0 through 9 for classes representing respective numbers - 10 through 35 for classes representing respective uppercase letters - 36 through 61 for classes representing respective lowercase letters ''' if c.isdigit() and int(c) < 40: return (int(c) - 30) elif int(c, 16) <= 90: # uppercase return (int(c, 16) - 55) else: return (int(c, 16) - 61) parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) by_writer_dir = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer') writers = util.load_obj(by_writer_dir) num_json = int(math.ceil(len(writers) / MAX_WRITERS)) users = [[] for _ in range(num_json)] num_samples = [[] for _ in range(num_json)] user_data = [{} for _ in range(num_json)] writer_count = 0 json_index = 0 for (w, l) in writers: users[json_index].append(w) num_samples[json_index].append(len(l)) user_data[json_index][w] = {'x': [], 'y': []}
import os import sys utils_dir = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) utils_dir = os.path.join(utils_dir, 'utils') sys.path.append(utils_dir) import util parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes') wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes') class_file_hashes = util.load_obj(cfhd) # each elem is (class, file dir, hash) write_file_hashes = util.load_obj( wfhd) # each elem is (writer, file dir, hash) class_hash_dict = {} for i in range(len(class_file_hashes)): (c, f, h) = class_file_hashes[len(class_file_hashes) - i - 1] class_hash_dict[h] = (c, f) write_classes = [] for tup in write_file_hashes: (w, f, h) = tup write_classes.append((w, f, class_hash_dict[h][0])) wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class') util.save_obj(write_classes, wwcd)
cnt = 0 isTooFast = False isTooSlow = False for seq in stat[key]['Sequences']: duration = seq[1] - seq[0] distance = util.dis(seq[2], seq[4], seq[3], seq[5]) if duration != 0: speed = distance / duration else: speed = 0 if duration >= minSequenceDuration: cnt += 1 if speed > maxSpeed: isTooFast = True if speed < minSpeed: isTooSlow = True # print(distance/1000, duration/1000/60, speed / 1000 * 1000 * 60 * 60) if cnt < minSequenceCnt: continue if isTooFast or isTooSlow: continue # decide to add the key filteredPlane[key] = 1 longFlightCnt += 1 print("remain planes cnt = ", longFlightCnt) # filter planes print('filter planes') stat = util.load_obj(os.path.join(conf["output_folder"], 'stat')) # print (stat) filterPlanes(stat, filteredPlane) with open(os.path.join(conf["output_folder"], 'filtered-plane.json'), 'w') as f: f.write(json.dumps(filteredPlane))
def train_network(model, game_state, observe=False): last_time = time.time() # store the previous observations in replay memory D = load_obj("D") # load from file system # get the first state by doing nothing do_nothing = np.zeros(ACTIONS) do_nothing[0] = 1 # 0 => do nothing, # 1=> jump x_t, r_0, terminal = game_state.get_state( do_nothing) # get next step after performing the action s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) # 1*20*40*4 initial_state = s_t if observe: OBSERVE = 999999999 # We keep observe, never train epsilon = FINAL_EPSILON print("Now we load weight") model.load_weights("model.h5") adam = Adam(lr=LEARNING_RATE) model.compile(loss='mse', optimizer=adam) print("Weight load successfully") else: # We go to training mode OBSERVE = OBSERVATION epsilon = load_obj("epsilon") model.load_weights("model.h5") adam = Adam(lr=LEARNING_RATE) model.compile(loss='mse', optimizer=adam) t = load_obj( "time") # resume from the previous time step stored in file system while True: # endless running loss = 0 Q_sa = 0 action_index = 0 a_t = np.zeros([ACTIONS]) # action at t # choose an action epsilon greedy if t % FRAME_PER_ACTION == 0: # parameter to skip frames for actions if random.random() <= epsilon: # randomly explore an action print("----------Random Action----------") action_index = random.randrange(ACTIONS) a_t[0] = 1 else: # predict the output q = model.predict( s_t) # input a stack of 4 images, get the prediction max_Q = np.argmax(q) # choose index with maximum q value action_index = max_Q a_t[action_index] = 1 # 0=> do nothing, 1=> jump # We reduced the epsilon (exploration parameter) gradually if epsilon > FINAL_EPSILON and t > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE # run the selected action and observed next state and reward x_t1, r_t, terminal = game_state.get_state(a_t) print('fps: {0}'.format(1 / (time.time() - last_time)) ) # helpful for measuring frame rate last_time = time.time() x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) # 1x20x40x1 s_t1 = np.append( x_t1, s_t[:, :, :, :3], axis=3 ) # append the new image to input stack and remove the first one # store the transition in D D.append((s_t, action_index, r_t, s_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() # only train if done observing if t > OBSERVE: # sample a mini_batch to train on mini_batch = random.sample(D, BATCH) inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3])) # 32, 20, 40, 4 targets = np.zeros((inputs.shape[0], ACTIONS)) # 32, 2 # Now we do the experience replay for i in range(0, len(mini_batch)): state_t = mini_batch[i][0] # 4D stack of images action_t = mini_batch[i][1] # This is action index reward_t = mini_batch[i][ 2] # reward at state_t due to action_t state_t1 = mini_batch[i][3] # next state terminal = mini_batch[i][ 4] # wheather the agent died or survided due the action inputs[i:i + 1] = state_t targets[i] = model.predict(state_t) # predicted q values Q_sa = model.predict( state_t1) # predict q values for next step if terminal: targets[ i, action_t] = reward_t # if terminated, only equals reward else: targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa) loss += model.train_on_batch(inputs, targets) loss_df.loc[len(loss_df)] = loss q_values_df.loc[len(q_values_df)] = np.max(Q_sa) s_t = initial_state if terminal else s_t1 # reset game to initial frame if terminate t = t + 1 # save progress every 1000 iterations if t % 1000 == 0: print("Now we save model") game_state._game.pause() # pause game while saving to filesystem model.save_weights("model.h5", overwrite=True) save_obj(D, "D") # saving episodes save_obj(t, "time") # caching time steps save_obj(epsilon, "epsilon" ) # cache epsilon to avoid repeated randomness in actions loss_df.to_csv("./objects/loss_df.csv", index=False) scores_df.to_csv("./objects/scores_df.csv", index=False) actions_df.to_csv("./objects/actions_df.csv", index=False) q_values_df.to_csv(q_value_file_path, index=False) with open("model.json", "w") as outfile: json.dump(model.to_json(), outfile) clear_output() game_state._game.resume() # print info if t <= OBSERVE: state = "observe" elif t <= OBSERVE + EXPLORE: state = "explore" else: state = "train" print "TIMESTAMP", t, "/ STATE", state, "/ EPSILON", epsilon, "/ ACTION"\ , action_index, "/ REWARD", r_t, "/ Q_MAX ", np.max(Q_sa), "/ Loss ", loss