def __init__(self, filename): FE.__init__(self) PostProcess.__init__(self) self.filename = filename try: fh = open(filename) except IOError: raise FEError("could not open file: '%s'" % filename) filename = os.path.split(filename)[-1] self.casename = os.path.splitext(filename)[0] self.analysis = None self.apar = {} self.step = 0 self.includelevel = 0 self.matidx = 1 self.nodemap = {} self.elmap = {} self.mat = {} self.nset = {} self.elset = {} self._postfh = None self._printfh = None self._filefh = None self._filestep = None self.processFile(fh)
def toggle_effect(self, *args, **kwargs): """ override Effect.toggle_effect to copy shaders to group post_process """ super(Group, self).toggle_effect(*args, **kwargs) if self.is_group and (self.active_effects or self.post_process): if not self.post_process: from postprocess import PostProcess self.post_process = PostProcess(self.parent) self.post_process.set_visible(1) self.post_process.toggle_effect(*args, **kwargs)
def __init__(self, task_id, file_name, file_id, credentials, download_dir, listener): self.task_id = task_id self.file_id = file_id self.credentials = credentials self.dir = download_dir self.listener = listener self.earth_engine_status = EarthEngineStatus(task_id=task_id, listener=self) self.drive_download = DriveDownload(credentials=self.credentials, file_name=file_name, file_id=self.file_id, download_dir=self.dir, listener=self) self.post_process = PostProcess(file_name=file_name, download_dir=download_dir, listener=self) self.current_step = None
def post_process(rec_prmtop, lig_prmtop, complex_prmtop, sampling_nc_file, nr_resampled_complexes, sander_tmp_dir, rec_pdb_out, lig_pdb_out, bpmf_pkl_out): post_pro = PostProcess(rec_prmtop, lig_prmtop, complex_prmtop, sampling_nc_file, SOLVENT_PHASES, nr_resampled_complexes, False, TEMPERATURE, sander_tmp_dir) post_pro.write_rececptor_pdb(rec_pdb_out) post_pro.write_resampled_ligand_pdb(lig_pdb_out) post_pro.pickle_bpmf(bpmf_pkl_out) return None
def main(): # build dataset batch_size = 1 height = 48 width = 48 dataset = TestDataGenerator(PuppetDataset, 4, batch_size, height=height, width=width) evaluate = Eval() postprocess = PostProcess(48, 48) # generate and display image_group, guide_mask_group, annkp_group = dataset.next() outobjects_group = [] for x in range(batch_size): image = image_group[x] # select last level see, and unuse other level mask = guide_mask_group[x][-1] display_my_masks(image, mask) # use groudtruth mask directly as predict mask to process outobjects_group.append(postprocess.process(mask)) evaluate.evaluate(annkp_group, outobjects_group)
def validate(model, test_data, golden_file, beam_size=8, alpha=0.6, max_time_step=100): """For development Only""" pp = PostProcess() ref_stream = [] for line in open(golden_file + '.input_clean'): if line.startswith('# ::tokens '): o = json.loads(line[len('# ::tokens '):].strip()) ref_stream.append(' '.join(o).lower()) # gold model output graph, gold_sys_stream, _, abstract = read_file(golden_file + '.preproc') ref_streams = [ref_stream] sys_stream = [] for batch in test_data: res = generate_batch(model, batch, beam_size, alpha, max_time_step) sys_stream.extend(res['token']) assert len(sys_stream) == len(ref_stream) sys_stream = [ pp.post_process(o, abstract[i], graph[i]) for i, o in enumerate(sys_stream) ] bleu = sacrebleu.corpus_bleu(sys_stream, ref_streams, force=True, lowercase=True, tokenize='none').score chrf = sacrebleu.corpus_chrf(sys_stream, ref_stream) return bleu, chrf
def validate(model, test_data, beam_size=8, alpha=0.6, max_time_step=100): """For development Only""" pp = PostProcess() ref_stream = [] sys_stream = [] for batch in test_data: res = generate_batch(model, batch, beam_size, alpha, max_time_step) sys_stream.extend(res['token']) ref_stream.extend(batch['target']) assert len(sys_stream) == len(ref_stream) sys_stream = [pp.post_process(o) for o in sys_stream] ref_stream = [' '.join(o) for i in ref_stream] ref_streams = [ref_stream] bleu = sacrebleu.corpus_bleu(sys_stream, ref_streams, force=True, lowercase=False, tokenize='none').score chrf = sacrebleu.corpus_chrf(sys_stream, ref_stream) return bleu, chrf
class Download(object): def __init__(self, task_id, file_name, file_id, credentials, download_dir, listener): self.task_id = task_id self.file_id = file_id self.credentials = credentials self.dir = download_dir self.listener = listener self.earth_engine_status = EarthEngineStatus(task_id=task_id, listener=self) self.drive_download = DriveDownload(credentials=self.credentials, file_name=file_name, file_id=self.file_id, download_dir=self.dir, listener=self) self.post_process = PostProcess(file_name=file_name, download_dir=download_dir, listener=self) self.current_step = None def update_status(self, status): self.listener.update_status(self.task_id, status) step = status['step'] if 'step' in status else None step_taken = step and self.current_step != step self.current_step = step if step_taken: if step == 'EXPORTED': self.earth_engine_status.stop() self.drive_download.start() elif step == 'DOWNLOADED': self.drive_download.stop() self.post_process.start() if status['state'] != 'ACTIVE': self.stop() def cancel(self): if self.earth_engine_status: self.earth_engine_status.cancel() if self.drive_download: self.drive_download.cancel() if self.post_process: self.post_process.cancel() self.stop() def stop(self): logging.debug('Stopping download of task ' + self.task_id) if self.earth_engine_status: self.earth_engine_status.stop() self.earth_engine_status = None if self.drive_download: self.drive_download.stop() self.drive_download = None if self.post_process: self.post_process.stop() self.post_process = None
class EvalPuppet(Callback): def __init__(self, generator, height=48, width=48, batch_size=1): super(EvalPuppet, self).__init__() self.batch_size = batch_size self.generator = generator self.postprocess = PostProcess(height, width) self.evaluate = Eval() def on_epoch_end(self, epoch, logs=None): # generate data image_group, guide_mask_group, annkp_group = self.generator.next() predict_mask_group = self.model.predict_on_batch(image_group)[-1] outobjects_group = [] for x in range(self.batch_size): # select last level see, and unuse other level mask = predict_mask_group[x, :, :, :] outobjects_group.append(self.postprocess.process(mask)) self.evaluate.evaluate(annkp_group, outobjects_group)
'user_hid_dim': uhid, 'user_input_length': userMaxLen, 'user_input_dim': embedding_dim } movieParams = { 'movie_hid_dim': mhid, 'movie_input_length': movieMaxLen, 'movie_input_dim': embedding_dim } neiParams = { 'nei_hid_dim': nhid, 'nei_input_length': neiMaxLen, 'nei_input_dim': embedding_dim } path = os.path.abspath('.') pst = PostProcess(path) # users = {} # for i in range(10): # users[i] = [np.random.rand(embedding_dim) for k in range(20)] # movies = {} # for j in range(15): # movies[j] = [np.random.rand(embedding_dim) for k in range(40)] # samples = [] # for i in range(10): # for j in range(15): # samples.append((i, j, float(np.random.randint(0, 5)))) # save(pst, samples, users, movies)
if args.rebuild: SyncDB.IS_REBUILD = args.rebuild == 'Y' if args.debug_node_ids: SyncDB.DEBUG_NODE_IDS = args.debug_node_ids.split(',') SyncDB.IS_REBUILD = False if args.source: SyncDB.TO_SYNC_DS = args.source.split(',') cr = SyncDB.load_xml_file(args.config) if not args.skipvalidation and not cr.check_inputs(): print "Some input files are missing or urls are broken!" SyncDB.ERROR_LOG.close() exit() if args.validate and not args.skipvalidation: print "Validation complete." SyncDB.ERROR_LOG.close() exit() cr.do_work() if not args.skippostprocess: PostProcess.do_postprocess() print "gp database was successfully built" SyncDB.ERROR_LOG.close() exit()
# from preprocess import preprocess import os import numpy as np from adapter import Adapt from attention import NeuralModel from postprocess import PostProcess from preprocess import preprocess from recommend import CFUtil from utils.utils import transform if __name__ == "__main__": np.random.seed(100) path = os.path.abspath('.') pst = PostProcess(path) samples, users, movies = preprocess() samples = samples[0:20000] users = transform(users) movies = transform(movies) # pst.saveSamples(samples, 'samples.csv') # pst.saveReviews(users, 'users.csv') # pst.saveReviews(movies, 'movies.csv') # load data # samples = pst.loadSamples('samples.csv') # users = pst.loadReviews('users.csv') # movies = pst.loadReviews('movies.csv')
class Group(object): def __init__(self, *args, **kwargs): self.is_group = False self.children_need_sorting = False self.is_sequence = 0 self.sequence_index = 0 self.sequence_normal_index = 0 self.post_process = None super(Group, self).__init__(*args, **kwargs) def draw(self, *args, **kwargs): if self.visible: if self.children_need_sorting: self.children = sorted(self.children, key=lambda slide: slide.z(), reverse=True) self.children_need_sorting = False if self.is_sequence: self.set_sequence_index(self.sequence_index) if self.is_group and self.active_effects: # capture children.draw() self.post_process.capture_start() super(Group, self).draw(*args, **kwargs) if self.is_group and self.active_effects: # stop capture self.post_process.capture_end() # copy shader uniforms self.post_process.unif[:] = self.unif[:] self.post_process.buf[0].unib[:] = self.buf[0].unib[:] self.post_process.unif_warp[:] = self.unif_warp[:] # draw if self.pos_z != self.post_process.pos_z: self.post_process.position(self.post_process.pos_x, self.post_process.pos_y, self.pos_z) self.post_process.draw() def toggle_effect(self, *args, **kwargs): """ override Effect.toggle_effect to copy shaders to group post_process """ super(Group, self).toggle_effect(*args, **kwargs) if self.is_group and (self.active_effects or self.post_process): if not self.post_process: from postprocess import PostProcess self.post_process = PostProcess(self.parent) self.post_process.set_visible(1) self.post_process.toggle_effect(*args, **kwargs) # MASK is a bit tricky # Disabled because mask doesn't scale # if self.effect_mask != self.post_process.effect_mask: # self.post_process.set_effect_mask(self.effect_mask) # if len(self.buf[0].textures) == 2: # del self.buf[0].textures[1] # same for warp # if self.warp != self.post_process.warp or (self.post_process.warp_1 != self.warp_1 or self.post_process.warp_2 != self.warp_2 or self.post_process.warp_3 != self.warp_3 or self.post_process.warp_4 != self.warp_4): # self.post_process.warp_1 = self.warp_1 # self.post_process.warp_2 = self.warp_2 # self.post_process.warp_3 = self.warp_3 # self.post_process.warp_4 = self.warp_4 # self.post_process.toggle_warp_effect() @osc_property('sequence_mode', 'is_sequence') def set_sequence_mode(self, mode): """ sequence mode (0=disabled, 1=enabled) """ if self.children: self.is_sequence = int(bool(mode)) @osc_property('sequence_index', 'sequence_index') def set_sequence_index(self, index): """ currently visible child by index (z-sorted) """ if self.is_sequence and self.children: for c in self.children: c.set_visible(0) index = int(index) % len(self.children) self.sequence_index = index self.children[index].set_visible(1) @osc_property('sequence_position', 'sequence_normal_index', shorthand=True) def set_sequence_normal_index(self, index): """ relative sequence position, normalized index (0<>1) """ if self.children: self.sequence_normal_index = max(0, min(float(index), 1)) if self.sequence_normal_index == 1: self.set_sequence_index(len(self.children) - 1) else: self.set_sequence_index(int(self.sequence_normal_index * len(self.children)))
userMaxLen = 20 movieMaxLen = 40 neiMaxLen = 60 usingNeiModel = False # mustn't modify the following parameters sim_thresh = 0.5 embedding_dim = 5 attParamDic = { 'user': [uhid, userMaxLen, embedding_dim], 'movie': [mhid, movieMaxLen, embedding_dim], 'nei': [nhid, neiMaxLen, embedding_dim] } path = os.path.abspath('.') pst = PostProcess(path) # users = {} # for i in range(10): # users[i] = [np.random.rand(embedding_dim) for k in range(20)] # movies = {} # for j in range(15): # movies[j] = [np.random.rand(embedding_dim) for k in range(40)] # samples = [] # for i in range(10): # for j in range(15): # samples.append((i, j, float(np.random.randint(0, 5)))) # save(pst, samples, users, movies)
from image import Image from preprocess import Preprocess from classifier import Classifier from postprocess import PostProcess genders = Image.genders() all_data, ids = Image.all() matrix = Preprocess.to_matrix(all_data) matrix = Preprocess.remove_constants(matrix) matrix = Preprocess.scale(matrix) matrix = Preprocess.polynomial(matrix, 2) matrix = Preprocess.scale(matrix) matrix = matrix.tolist() train = matrix[:1128] test = matrix[1128:] test_ids = ids[1128:] print len(train) print len(test) print len(test_ids) print len(ids) print len(matrix) preds = Classifier.ensemble_preds(train, genders, test) # real # preds = Classifier.ensemble_preds(train, genders, train) # fake # for creating submission file PostProcess.submission(test_ids, preds)
def get_imp_keywords_for_year(self, year): year_docs_word_tuple_dict, year_words_list = self.get_keywords_for_year(year) postProcess = PostProcess() return year_docs_word_tuple_dict, postProcess.find_common_words(year_words_list, 1000)
def main(): args = get_arguments() random.seed(args.random_seed) np.random.seed(args.random_seed) # sklearn use np to generate random value # Create folders and set logging format args.model_dir = os.path.join(args.out_dir, 'ckpt-{}'.format(args.class_weight_scheme)) args.log_dir = os.path.join(args.out_dir, 'log') args.ensemble_dir = os.path.join(args.out_dir, 'ensemble-{}'.format(args.class_weight_scheme)) if args.class_weight_scheme == 'customize': args.model_dir = os.path.join(args.model_dir, 'weight{}'.format(args.additional_weight)) args.ensemble_dir = os.path.join(args.ensemble_dir, 'weight{}'.format(args.additional_weight)) prepare_folders(args) logger = set_logging(args) logger.info("Here is the arguments of this running:") logger.info("{}".format(args)) utils.check_args_conflict(args) # Set files which contain data for training and test. If use "trecis2019-A", it means we want to tune parameters. args.data_prefix = "trecis2019-B" # Note that for 2019-B submission, all '2019' means '2019-B' and '2018' means '2018 + 2019-A' label_file = os.path.join(args.data_dir, 'ITR-H.types.v{}.json'.format( 4 if args.data_prefix == "trecis2019-B" else 3)) tweet_file_list = [os.path.join(args.data_dir, 'all-tweets.txt')] tweet_file_list_2019 = [os.path.join(args.data_dir, 'all-tweets-2019.txt')] train_file_list = [os.path.join(args.data_dir, 'TRECIS-CTIT-H-Training.json')] train_file_list += [os.path.join(args.data_dir, 'TRECIS-2018-TestEvents-Labels', 'assr{}.test'.format(i)) for i in range(1, 7)] if args.data_prefix == "trecis2019-B": train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019A-assr{}.json'.format(i)) for i in range(1, 6)] train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019-assr2.json')] test_raw_tweets_json_folder = 'download_tweets' # Some output files which has been formalized for further usages. formal_train_file = os.path.join(args.data_dir, 'train.txt{}'.format('_small' if args.sanity_check else '')) formal_test_file = os.path.join(args.data_dir, 'test.txt{}') tweet_text_out_file = os.path.join(args.out_dir, 'tweets-clean-text.txt') tweet_id_out_file = os.path.join(args.out_dir, 'tweets-id.txt') tweet_text_out_file_2019 = os.path.join(args.out_dir, 'tweets-clean-text-2019.txt') tweet_id_out_file_2019 = os.path.join(args.out_dir, 'tweets-id-2019.txt') predict_priority_score_out_file = os.path.join(args.out_dir, 'predict_priority_score.txt') # Set files for submission. args.model_name = '{0}{1}'.format(args.model, '-event' if args.event_wise else '') args.dev_label_file = os.path.join(args.ensemble_dir, 'dev_label.txt') args.dev_predict_file = os.path.join(args.ensemble_dir, 'dev_predict_{}.txt'.format(args.model_name)) args.test_predict_file = os.path.join(args.ensemble_dir, 'test_predict_{}.txt'.format(args.model_name)) args.submission_folder = utils.prepare_submission_folder(args) args.submission_file = os.path.join(args.submission_folder, 'submission_{}'.format(args.model_name)) # As the original files provided by TREC is quite messy, we formalize them into train and test file. utils.formalize_files(train_file_list, formal_train_file, args) utils.formalize_test_file(test_raw_tweets_json_folder, formal_test_file, prefix=args.data_prefix) logger.info("The training data file is {0} and testing data file is {1}".format( formal_train_file, formal_test_file)) # Step0. Extract some info which can be used later (also useful for generating submission files). label2id, majority_label, short2long_label = utils.get_label2id(label_file, formal_train_file, args.cv_num) id2label = utils.get_id2label(label2id) class_weight = utils.get_class_weight(args, label2id, id2label, formal_train_file) # When get submission, there is no need to run all following steps, but only read the `test_predict_file` and # pick some classes as final output according to policy (such as top-2 or auto-threshold). # You MUST run `--predict_mode` in advance to get the `test_predict_file` prepared. if args.get_submission: postpro = PostProcess(args, label2id, id2label, class_weight, majority_label, short2long_label, formal_train_file, formal_test_file, test_raw_tweets_json_folder, predict_priority_score_out_file) postpro.pick_labels_and_write_final_result() quit() # Step1. Preprocess and extract features for all tweets tweetid_list, tweet_content_list = utils.get_tweetid_content(tweet_file_list) utils.write_tweet_and_ids(tweetid_list, tweet_content_list, tweet_text_out_file, tweet_id_out_file) tweetid_list_2019, tweet_content_list_2019 = utils.get_tweetid_content(tweet_file_list_2019) utils.write_tweet_and_ids(tweetid_list_2019, tweet_content_list_2019, tweet_text_out_file_2019, tweet_id_out_file_2019) # Note that before `extract_features()`, we should manually run the `extract_features.sh` in `feature_tools`. # quit() # The `extract_features.sh` only need to be run once for the same dataset. preprocess = Preprocess(args, tweetid_list, tweet_content_list, label2id, tweet_id_out_file) preprocess.extract_features() preprocess_2019 = Preprocess(args, tweetid_list_2019, tweet_content_list_2019, label2id, tweet_id_out_file_2019, test=True) preprocess_2019.extract_features() if args.train_regression: data_x, data_score = preprocess.extract_train_data(formal_train_file, get_score=True) train_regression = TrainRegression(args, data_x, data_score) if args.cross_validate: train_regression.cross_validate() quit() if args.cross_validate: # Step2. Train and Cross-validation (for tuning hyper-parameters). # If we want to do ensemble in the future, we need the prediction on dev data by setting `--cross_validate`. if args.event_wise: data_x, data_y, event2idx_list, line_num = preprocess.extract_train_data(formal_train_file) data_predict_collect = np.zeros([line_num, len(label2id)]) metrics_collect = [] metric_names = None for event_type in utils.idx2event_type: it_data_x, it_data_y = data_x[event_type], data_y[event_type] train = Train(args, it_data_x, it_data_y, id2label, preprocess.feature_len, class_weight, event_type) metrics, predict_score = train.train() for i, idx in enumerate(event2idx_list[event_type]): data_predict_collect[idx] = predict_score[i] metrics_collect.append((metrics, it_data_x.shape[0])) if metric_names is None: metric_names = train.metric_names utils.get_final_metrics(metrics_collect, metric_names) else: data_x, data_y = preprocess.extract_train_data(formal_train_file) train = Train(args, data_x, data_y, id2label, preprocess.feature_len, class_weight) _, data_predict_collect = train.train() if args.predict_mode: utils.write_predict_and_label(args, formal_train_file, label2id, data_predict_collect) if args.predict_mode: # Step3. Get the 2019 test data, and retrain the model on all training data, then predict on the 2019-test if args.event_wise: data_x, data_y, _, _ = preprocess.extract_train_data(formal_train_file) test_x, event2idx_list, line_num = preprocess_2019.extract_formalized_test_data(formal_test_file) test_predict_collect = np.zeros([line_num, len(label2id)]) for event_type in utils.idx2event_type: it_data_x, it_data_y, it_test_x = data_x[event_type], data_y[event_type], test_x[event_type] if len(it_test_x) == 0: print("[WARNING] There are no event belongs to {} for the test data".format(event_type)) continue train = Train(args, it_data_x, it_data_y, id2label, preprocess_2019.feature_len, class_weight, event_type) train.train_on_all() predict_score = train.predict_on_test(it_test_x) for i, idx in enumerate(event2idx_list[event_type]): test_predict_collect[idx] = predict_score[i] else: data_x, data_y = preprocess.extract_train_data(formal_train_file) test_x = preprocess_2019.extract_formalized_test_data(formal_test_file) train = Train(args, data_x, data_y, id2label, preprocess_2019.feature_len, class_weight) train.train_on_all() test_predict_collect = train.predict_on_test(test_x) utils.write_predict_res_to_file(args, test_predict_collect) if args.train_regression: test_x = preprocess_2019.extract_formalized_test_data(formal_test_file) if args.event_wise: # For event_wise setting, there will be many additional things extracted, what we need is only test_x. test_x = test_x[0] train_regression.train() predict_priority_score = train_regression.predict_on_test(test_x) utils.write_predict_score_to_file(predict_priority_score, predict_priority_score_out_file) if args.ensemble is not None: # TODO(junpeiz): Average the priority score for ensemble. # Step4 (optional). Do the ensemble of different model if args.event_wise: raise NotImplementedError("We don't want to ensemble for event-wise models") else: out_file = os.path.join(args.out_dir, 'ensemble_out.txt') # Note the file list contains predictions from all models with and without the '-event' suffix. # So, we need to train both event-wise and not event-wise models or just delete those files in the folder. dev_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'dev_predict_') test_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'test_predict_') train_x = utils.get_ensemble_feature(dev_predict_file_list) train_y = utils.get_ensemble_label(args.dev_label_file) print("The shape of ensemble train_x is {0}".format(train_x.shape)) utils.ensemble_cross_validate(train_x, train_y, id2label, train.mlb, args.ensemble) test_x = utils.get_ensemble_feature(test_predict_file_list) predict = utils.ensemble_train_and_predict(train_x, train.mlb.transform(train_y), test_x, id2label, args.ensemble) predict = [id2label[x] for x in predict] with open(out_file, 'w', encoding='utf8') as f: for it_predict in predict: f.write("{}\n".format(it_predict)) print("The ensemble result has been written to {}".format(out_file))
def __init__(self, generator, height=48, width=48, batch_size=1): super(EvalPuppet, self).__init__() self.batch_size = batch_size self.generator = generator self.postprocess = PostProcess(height, width) self.evaluate = Eval()