예제 #1
0
파일: parser.py 프로젝트: FredDavison/feapy
 def __init__(self, filename):
     FE.__init__(self)
     PostProcess.__init__(self)
     
     self.filename = filename
     
     try:
         fh = open(filename)
     except IOError:
         raise FEError("could not open file: '%s'" % filename)
     
     filename = os.path.split(filename)[-1]
     self.casename = os.path.splitext(filename)[0]
     
     self.analysis = None
     self.apar = {}
     self.step = 0
     self.includelevel = 0
     self.matidx = 1
     self.nodemap = {}
     self.elmap = {}
     self.mat = {}
     self.nset = {}
     self.elset = {}
     
     self._postfh = None
     self._printfh = None
     self._filefh = None
     self._filestep = None
     
     self.processFile(fh)
예제 #2
0
파일: parser.py 프로젝트: Kleissl/feapy
    def __init__(self, filename):
        FE.__init__(self)
        PostProcess.__init__(self)

        self.filename = filename

        try:
            fh = open(filename)
        except IOError:
            raise FEError("could not open file: '%s'" % filename)

        filename = os.path.split(filename)[-1]
        self.casename = os.path.splitext(filename)[0]

        self.analysis = None
        self.apar = {}
        self.step = 0
        self.includelevel = 0
        self.matidx = 1
        self.nodemap = {}
        self.elmap = {}
        self.mat = {}
        self.nset = {}
        self.elset = {}

        self._postfh = None
        self._printfh = None
        self._filefh = None
        self._filestep = None

        self.processFile(fh)
예제 #3
0
    def toggle_effect(self, *args, **kwargs):
        """
        override Effect.toggle_effect to copy shaders to group post_process
        """
        super(Group, self).toggle_effect(*args, **kwargs)

        if self.is_group and (self.active_effects or self.post_process):

            if not self.post_process:
                from postprocess import PostProcess
                self.post_process = PostProcess(self.parent)
                self.post_process.set_visible(1)

            self.post_process.toggle_effect(*args, **kwargs)
예제 #4
0
 def __init__(self, task_id, file_name, file_id, credentials, download_dir,
              listener):
     self.task_id = task_id
     self.file_id = file_id
     self.credentials = credentials
     self.dir = download_dir
     self.listener = listener
     self.earth_engine_status = EarthEngineStatus(task_id=task_id,
                                                  listener=self)
     self.drive_download = DriveDownload(credentials=self.credentials,
                                         file_name=file_name,
                                         file_id=self.file_id,
                                         download_dir=self.dir,
                                         listener=self)
     self.post_process = PostProcess(file_name=file_name,
                                     download_dir=download_dir,
                                     listener=self)
     self.current_step = None
예제 #5
0
def post_process(rec_prmtop, lig_prmtop, complex_prmtop, sampling_nc_file,
                 nr_resampled_complexes, sander_tmp_dir, rec_pdb_out,
                 lig_pdb_out, bpmf_pkl_out):
    post_pro = PostProcess(rec_prmtop, lig_prmtop, complex_prmtop,
                           sampling_nc_file, SOLVENT_PHASES,
                           nr_resampled_complexes, False, TEMPERATURE,
                           sander_tmp_dir)
    post_pro.write_rececptor_pdb(rec_pdb_out)
    post_pro.write_resampled_ligand_pdb(lig_pdb_out)
    post_pro.pickle_bpmf(bpmf_pkl_out)
    return None
예제 #6
0
def main():
    # build dataset
    batch_size = 1
    height = 48
    width = 48
    dataset = TestDataGenerator(PuppetDataset, 4, batch_size, height=height, width=width)
    evaluate = Eval()
    postprocess = PostProcess(48, 48)
    # generate and display
    image_group, guide_mask_group, annkp_group = dataset.next()
    outobjects_group = []
    for x in range(batch_size):
        image = image_group[x]
        # select last level see, and unuse other level
        mask = guide_mask_group[x][-1]
        display_my_masks(image, mask)
        # use groudtruth mask directly as predict mask to process
        outobjects_group.append(postprocess.process(mask))
    evaluate.evaluate(annkp_group, outobjects_group)
예제 #7
0
def validate(model,
             test_data,
             golden_file,
             beam_size=8,
             alpha=0.6,
             max_time_step=100):
    """For development Only"""
    pp = PostProcess()

    ref_stream = []
    for line in open(golden_file + '.input_clean'):
        if line.startswith('# ::tokens '):
            o = json.loads(line[len('# ::tokens '):].strip())
            ref_stream.append(' '.join(o).lower())
    # gold model output
    graph, gold_sys_stream, _, abstract = read_file(golden_file + '.preproc')
    ref_streams = [ref_stream]

    sys_stream = []
    for batch in test_data:
        res = generate_batch(model, batch, beam_size, alpha, max_time_step)
        sys_stream.extend(res['token'])

    assert len(sys_stream) == len(ref_stream)
    sys_stream = [
        pp.post_process(o, abstract[i], graph[i])
        for i, o in enumerate(sys_stream)
    ]

    bleu = sacrebleu.corpus_bleu(sys_stream,
                                 ref_streams,
                                 force=True,
                                 lowercase=True,
                                 tokenize='none').score
    chrf = sacrebleu.corpus_chrf(sys_stream, ref_stream)

    return bleu, chrf
예제 #8
0
파일: work.py 프로젝트: youngflyasd/gtos
def validate(model, test_data, beam_size=8, alpha=0.6, max_time_step=100):
    """For development Only"""
    pp = PostProcess()

    ref_stream = []
    sys_stream = []
    for batch in test_data:
        res = generate_batch(model, batch, beam_size, alpha, max_time_step)
        sys_stream.extend(res['token'])
        ref_stream.extend(batch['target'])

    assert len(sys_stream) == len(ref_stream)
    sys_stream = [pp.post_process(o) for o in sys_stream]
    ref_stream = [' '.join(o) for i in ref_stream]
    ref_streams = [ref_stream]

    bleu = sacrebleu.corpus_bleu(sys_stream,
                                 ref_streams,
                                 force=True,
                                 lowercase=False,
                                 tokenize='none').score
    chrf = sacrebleu.corpus_chrf(sys_stream, ref_stream)

    return bleu, chrf
예제 #9
0
class Download(object):
    def __init__(self, task_id, file_name, file_id, credentials, download_dir,
                 listener):
        self.task_id = task_id
        self.file_id = file_id
        self.credentials = credentials
        self.dir = download_dir
        self.listener = listener
        self.earth_engine_status = EarthEngineStatus(task_id=task_id,
                                                     listener=self)
        self.drive_download = DriveDownload(credentials=self.credentials,
                                            file_name=file_name,
                                            file_id=self.file_id,
                                            download_dir=self.dir,
                                            listener=self)
        self.post_process = PostProcess(file_name=file_name,
                                        download_dir=download_dir,
                                        listener=self)
        self.current_step = None

    def update_status(self, status):
        self.listener.update_status(self.task_id, status)
        step = status['step'] if 'step' in status else None
        step_taken = step and self.current_step != step
        self.current_step = step
        if step_taken:
            if step == 'EXPORTED':
                self.earth_engine_status.stop()
                self.drive_download.start()
            elif step == 'DOWNLOADED':
                self.drive_download.stop()
                self.post_process.start()
        if status['state'] != 'ACTIVE':
            self.stop()

    def cancel(self):
        if self.earth_engine_status:
            self.earth_engine_status.cancel()
        if self.drive_download:
            self.drive_download.cancel()
        if self.post_process:
            self.post_process.cancel()
        self.stop()

    def stop(self):
        logging.debug('Stopping download of task ' + self.task_id)
        if self.earth_engine_status:
            self.earth_engine_status.stop()
            self.earth_engine_status = None
        if self.drive_download:
            self.drive_download.stop()
            self.drive_download = None
        if self.post_process:
            self.post_process.stop()
            self.post_process = None
예제 #10
0
class EvalPuppet(Callback):
    def __init__(self, generator, height=48, width=48, batch_size=1):
        super(EvalPuppet, self).__init__()
        self.batch_size = batch_size
        self.generator = generator
        self.postprocess = PostProcess(height, width)
        self.evaluate = Eval()

    def on_epoch_end(self, epoch, logs=None):
        # generate data
        image_group, guide_mask_group, annkp_group = self.generator.next()
        predict_mask_group = self.model.predict_on_batch(image_group)[-1]
        outobjects_group = []
        for x in range(self.batch_size):
            # select last level see, and unuse other level
            mask = predict_mask_group[x, :, :, :]
            outobjects_group.append(self.postprocess.process(mask))
        self.evaluate.evaluate(annkp_group, outobjects_group)
예제 #11
0
    'user_hid_dim': uhid,
    'user_input_length': userMaxLen,
    'user_input_dim': embedding_dim
}
movieParams = {
    'movie_hid_dim': mhid,
    'movie_input_length': movieMaxLen,
    'movie_input_dim': embedding_dim
}
neiParams = {
    'nei_hid_dim': nhid,
    'nei_input_length': neiMaxLen,
    'nei_input_dim': embedding_dim
}
path = os.path.abspath('.')
pst = PostProcess(path)

# users = {}
# for i in range(10):
#     users[i] = [np.random.rand(embedding_dim) for k in range(20)]

# movies = {}
# for j in range(15):
#     movies[j] = [np.random.rand(embedding_dim) for k in range(40)]

# samples = []
# for i in range(10):
#     for j in range(15):
#         samples.append((i, j, float(np.random.randint(0, 5))))
# save(pst, samples, users, movies)
예제 #12
0
파일: gp.py 프로젝트: fossilqq/Metascape
    if args.rebuild:
        SyncDB.IS_REBUILD = args.rebuild == 'Y'

    if args.debug_node_ids:
        SyncDB.DEBUG_NODE_IDS = args.debug_node_ids.split(',')
        SyncDB.IS_REBUILD = False

    if args.source:
        SyncDB.TO_SYNC_DS = args.source.split(',')

    cr = SyncDB.load_xml_file(args.config)
    if not args.skipvalidation and not cr.check_inputs():
        print "Some input files are missing or urls are broken!"
        SyncDB.ERROR_LOG.close()
        exit()

    if args.validate and not args.skipvalidation:
        print "Validation complete."
        SyncDB.ERROR_LOG.close()
        exit()

    cr.do_work()

    if not args.skippostprocess:
        PostProcess.do_postprocess()

    print "gp database was successfully built"
    SyncDB.ERROR_LOG.close()
    exit()
예제 #13
0
# from preprocess import preprocess
import os

import numpy as np

from adapter import Adapt
from attention import NeuralModel
from postprocess import PostProcess
from preprocess import preprocess
from recommend import CFUtil
from utils.utils import transform

if __name__ == "__main__":
    np.random.seed(100)
    path = os.path.abspath('.')
    pst = PostProcess(path)

    samples, users, movies = preprocess()
    samples = samples[0:20000]
    users = transform(users)
    movies = transform(movies)

    # pst.saveSamples(samples, 'samples.csv')
    # pst.saveReviews(users, 'users.csv')
    # pst.saveReviews(movies, 'movies.csv')

    # load data
    # samples = pst.loadSamples('samples.csv')
    # users = pst.loadReviews('users.csv')
    # movies = pst.loadReviews('movies.csv')
예제 #14
0
class Group(object):

    def __init__(self, *args, **kwargs):

        self.is_group = False
        self.children_need_sorting = False

        self.is_sequence = 0
        self.sequence_index = 0
        self.sequence_normal_index = 0

        self.post_process = None

        super(Group, self).__init__(*args, **kwargs)

    def draw(self, *args, **kwargs):

        if self.visible:

            if self.children_need_sorting:
                self.children = sorted(self.children, key=lambda slide: slide.z(), reverse=True)
                self.children_need_sorting = False

                if self.is_sequence:
                    self.set_sequence_index(self.sequence_index)

            if self.is_group and self.active_effects:
                # capture children.draw()
                self.post_process.capture_start()

            super(Group, self).draw(*args, **kwargs)

            if self.is_group and self.active_effects:
                # stop capture
                self.post_process.capture_end()
                # copy shader uniforms
                self.post_process.unif[:] = self.unif[:]
                self.post_process.buf[0].unib[:] = self.buf[0].unib[:]
                self.post_process.unif_warp[:] = self.unif_warp[:]
                # draw
                if self.pos_z != self.post_process.pos_z:
                    self.post_process.position(self.post_process.pos_x, self.post_process.pos_y, self.pos_z)
                self.post_process.draw()

    def toggle_effect(self, *args, **kwargs):
        """
        override Effect.toggle_effect to copy shaders to group post_process
        """
        super(Group, self).toggle_effect(*args, **kwargs)

        if self.is_group and (self.active_effects or self.post_process):

            if not self.post_process:
                from postprocess import PostProcess
                self.post_process = PostProcess(self.parent)
                self.post_process.set_visible(1)

            self.post_process.toggle_effect(*args, **kwargs)

            # MASK is a bit tricky
            # Disabled because mask doesn't scale
            # if self.effect_mask != self.post_process.effect_mask:
            #     self.post_process.set_effect_mask(self.effect_mask)
            # if len(self.buf[0].textures) == 2:
            #     del self.buf[0].textures[1]

            # same for warp
            # if self.warp != self.post_process.warp or (self.post_process.warp_1 != self.warp_1 or self.post_process.warp_2 != self.warp_2 or self.post_process.warp_3 != self.warp_3 or self.post_process.warp_4 != self.warp_4):
            #     self.post_process.warp_1 = self.warp_1
            #     self.post_process.warp_2 = self.warp_2
            #     self.post_process.warp_3 = self.warp_3
            #     self.post_process.warp_4 = self.warp_4
            #     self.post_process.toggle_warp_effect()

    @osc_property('sequence_mode', 'is_sequence')
    def set_sequence_mode(self, mode):
        """
        sequence mode (0=disabled, 1=enabled)
        """
        if self.children:
            self.is_sequence = int(bool(mode))

    @osc_property('sequence_index', 'sequence_index')
    def set_sequence_index(self, index):
        """
        currently visible child by index (z-sorted)
        """
        if self.is_sequence and self.children:
            for c in self.children:
                c.set_visible(0)
            index = int(index) % len(self.children)
            self.sequence_index = index
            self.children[index].set_visible(1)

    @osc_property('sequence_position', 'sequence_normal_index', shorthand=True)
    def set_sequence_normal_index(self, index):
        """
        relative sequence position, normalized index (0<>1)
        """
        if self.children:
            self.sequence_normal_index = max(0, min(float(index), 1))
            if self.sequence_normal_index == 1:
                self.set_sequence_index(len(self.children) - 1)
            else:
                self.set_sequence_index(int(self.sequence_normal_index * len(self.children)))
예제 #15
0
userMaxLen = 20
movieMaxLen = 40
neiMaxLen = 60
usingNeiModel = False

# mustn't modify the following parameters

sim_thresh = 0.5
embedding_dim = 5
attParamDic = {
    'user': [uhid, userMaxLen, embedding_dim],
    'movie': [mhid, movieMaxLen, embedding_dim],
    'nei': [nhid, neiMaxLen, embedding_dim]
}
path = os.path.abspath('.')
pst = PostProcess(path)

# users = {}
# for i in range(10):
#     users[i] = [np.random.rand(embedding_dim) for k in range(20)]

# movies = {}
# for j in range(15):
#     movies[j] = [np.random.rand(embedding_dim) for k in range(40)]

# samples = []
# for i in range(10):
#     for j in range(15):
#         samples.append((i, j, float(np.random.randint(0, 5))))
# save(pst, samples, users, movies)
from image import Image
from preprocess import Preprocess
from classifier import Classifier
from postprocess import PostProcess

genders = Image.genders()
all_data, ids = Image.all()
matrix = Preprocess.to_matrix(all_data)
matrix = Preprocess.remove_constants(matrix)
matrix = Preprocess.scale(matrix)
matrix = Preprocess.polynomial(matrix, 2)
matrix = Preprocess.scale(matrix)
matrix = matrix.tolist()
train = matrix[:1128]
test = matrix[1128:]
test_ids = ids[1128:]
print len(train)
print len(test)
print len(test_ids)
print len(ids)
print len(matrix)
preds = Classifier.ensemble_preds(train, genders, test)  # real
# preds = Classifier.ensemble_preds(train, genders, train) # fake

# for creating submission file
PostProcess.submission(test_ids, preds)
 def get_imp_keywords_for_year(self, year):
     year_docs_word_tuple_dict, year_words_list = self.get_keywords_for_year(year)
     postProcess = PostProcess()
     return year_docs_word_tuple_dict, postProcess.find_common_words(year_words_list, 1000)
예제 #18
0
파일: main.py 프로젝트: berniebear/trec_is
def main():
    args = get_arguments()
    random.seed(args.random_seed)
    np.random.seed(args.random_seed)  # sklearn use np to generate random value

    # Create folders and set logging format
    args.model_dir = os.path.join(args.out_dir, 'ckpt-{}'.format(args.class_weight_scheme))
    args.log_dir = os.path.join(args.out_dir, 'log')
    args.ensemble_dir = os.path.join(args.out_dir, 'ensemble-{}'.format(args.class_weight_scheme))
    if args.class_weight_scheme == 'customize':
        args.model_dir = os.path.join(args.model_dir, 'weight{}'.format(args.additional_weight))
        args.ensemble_dir = os.path.join(args.ensemble_dir, 'weight{}'.format(args.additional_weight))
    prepare_folders(args)
    logger = set_logging(args)
    logger.info("Here is the arguments of this running:")
    logger.info("{}".format(args))
    utils.check_args_conflict(args)

    # Set files which contain data for training and test. If use "trecis2019-A", it means we want to tune parameters.
    args.data_prefix = "trecis2019-B"
    # Note that for 2019-B submission, all '2019' means '2019-B' and '2018' means '2018 + 2019-A'
    label_file = os.path.join(args.data_dir, 'ITR-H.types.v{}.json'.format(
        4 if args.data_prefix == "trecis2019-B" else 3))
    tweet_file_list = [os.path.join(args.data_dir, 'all-tweets.txt')]
    tweet_file_list_2019 = [os.path.join(args.data_dir, 'all-tweets-2019.txt')]
    train_file_list = [os.path.join(args.data_dir, 'TRECIS-CTIT-H-Training.json')]
    train_file_list += [os.path.join(args.data_dir, 'TRECIS-2018-TestEvents-Labels',
                                     'assr{}.test'.format(i)) for i in range(1, 7)]
    if args.data_prefix == "trecis2019-B":
        train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019A-assr{}.json'.format(i)) for i in range(1, 6)]
        train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019-assr2.json')]
    test_raw_tweets_json_folder = 'download_tweets'
    # Some output files which has been formalized for further usages.
    formal_train_file = os.path.join(args.data_dir, 'train.txt{}'.format('_small' if args.sanity_check else ''))
    formal_test_file = os.path.join(args.data_dir, 'test.txt{}')
    tweet_text_out_file = os.path.join(args.out_dir, 'tweets-clean-text.txt')
    tweet_id_out_file = os.path.join(args.out_dir, 'tweets-id.txt')
    tweet_text_out_file_2019 = os.path.join(args.out_dir, 'tweets-clean-text-2019.txt')
    tweet_id_out_file_2019 = os.path.join(args.out_dir, 'tweets-id-2019.txt')
    predict_priority_score_out_file = os.path.join(args.out_dir, 'predict_priority_score.txt')

    # Set files for submission.
    args.model_name = '{0}{1}'.format(args.model, '-event' if args.event_wise else '')
    args.dev_label_file = os.path.join(args.ensemble_dir, 'dev_label.txt')
    args.dev_predict_file = os.path.join(args.ensemble_dir, 'dev_predict_{}.txt'.format(args.model_name))
    args.test_predict_file = os.path.join(args.ensemble_dir, 'test_predict_{}.txt'.format(args.model_name))
    args.submission_folder = utils.prepare_submission_folder(args)
    args.submission_file = os.path.join(args.submission_folder, 'submission_{}'.format(args.model_name))

    # As the original files provided by TREC is quite messy, we formalize them into train and test file.
    utils.formalize_files(train_file_list, formal_train_file, args)
    utils.formalize_test_file(test_raw_tweets_json_folder, formal_test_file, prefix=args.data_prefix)
    logger.info("The training data file is {0} and testing data file is {1}".format(
        formal_train_file, formal_test_file))

    # Step0. Extract some info which can be used later (also useful for generating submission files).
    label2id, majority_label, short2long_label = utils.get_label2id(label_file, formal_train_file, args.cv_num)
    id2label = utils.get_id2label(label2id)
    class_weight = utils.get_class_weight(args, label2id, id2label, formal_train_file)

    # When get submission, there is no need to run all following steps, but only read the `test_predict_file` and
    # pick some classes as final output according to policy (such as top-2 or auto-threshold).
    # You MUST run `--predict_mode` in advance to get the `test_predict_file` prepared.
    if args.get_submission:
        postpro = PostProcess(args, label2id, id2label, class_weight, majority_label, short2long_label,
                              formal_train_file, formal_test_file, test_raw_tweets_json_folder,
                              predict_priority_score_out_file)
        postpro.pick_labels_and_write_final_result()
        quit()

    # Step1. Preprocess and extract features for all tweets
    tweetid_list, tweet_content_list = utils.get_tweetid_content(tweet_file_list)
    utils.write_tweet_and_ids(tweetid_list, tweet_content_list, tweet_text_out_file, tweet_id_out_file)
    tweetid_list_2019, tweet_content_list_2019 = utils.get_tweetid_content(tweet_file_list_2019)
    utils.write_tweet_and_ids(tweetid_list_2019, tweet_content_list_2019, tweet_text_out_file_2019,
                              tweet_id_out_file_2019)
    # Note that before `extract_features()`, we should manually run the `extract_features.sh` in `feature_tools`.
    # quit()  # The `extract_features.sh` only need to be run once for the same dataset.
    preprocess = Preprocess(args, tweetid_list, tweet_content_list, label2id, tweet_id_out_file)
    preprocess.extract_features()
    preprocess_2019 = Preprocess(args, tweetid_list_2019, tweet_content_list_2019, label2id,
                                 tweet_id_out_file_2019, test=True)
    preprocess_2019.extract_features()

    if args.train_regression:
        data_x, data_score = preprocess.extract_train_data(formal_train_file, get_score=True)
        train_regression = TrainRegression(args, data_x, data_score)
        if args.cross_validate:
            train_regression.cross_validate()
            quit()

    if args.cross_validate:
        # Step2. Train and Cross-validation (for tuning hyper-parameters).
        # If we want to do ensemble in the future, we need the prediction on dev data by setting `--cross_validate`.
        if args.event_wise:
            data_x, data_y, event2idx_list, line_num = preprocess.extract_train_data(formal_train_file)
            data_predict_collect = np.zeros([line_num, len(label2id)])
            metrics_collect = []
            metric_names = None
            for event_type in utils.idx2event_type:
                it_data_x, it_data_y = data_x[event_type], data_y[event_type]
                train = Train(args, it_data_x, it_data_y, id2label, preprocess.feature_len, class_weight, event_type)
                metrics, predict_score = train.train()
                for i, idx in enumerate(event2idx_list[event_type]):
                    data_predict_collect[idx] = predict_score[i]
                metrics_collect.append((metrics, it_data_x.shape[0]))
                if metric_names is None:
                    metric_names = train.metric_names
            utils.get_final_metrics(metrics_collect, metric_names)
        else:
            data_x, data_y = preprocess.extract_train_data(formal_train_file)
            train = Train(args, data_x, data_y, id2label, preprocess.feature_len, class_weight)
            _, data_predict_collect = train.train()
        if args.predict_mode:
            utils.write_predict_and_label(args, formal_train_file, label2id, data_predict_collect)

    if args.predict_mode:
        # Step3. Get the 2019 test data, and retrain the model on all training data, then predict on the 2019-test
        if args.event_wise:
            data_x, data_y, _, _ = preprocess.extract_train_data(formal_train_file)
            test_x, event2idx_list, line_num = preprocess_2019.extract_formalized_test_data(formal_test_file)
            test_predict_collect = np.zeros([line_num, len(label2id)])
            for event_type in utils.idx2event_type:
                it_data_x, it_data_y, it_test_x = data_x[event_type], data_y[event_type], test_x[event_type]
                if len(it_test_x) == 0:
                    print("[WARNING] There are no event belongs to {} for the test data".format(event_type))
                    continue
                train = Train(args, it_data_x, it_data_y, id2label,
                              preprocess_2019.feature_len, class_weight, event_type)
                train.train_on_all()
                predict_score = train.predict_on_test(it_test_x)
                for i, idx in enumerate(event2idx_list[event_type]):
                    test_predict_collect[idx] = predict_score[i]
        else:
            data_x, data_y = preprocess.extract_train_data(formal_train_file)
            test_x = preprocess_2019.extract_formalized_test_data(formal_test_file)
            train = Train(args, data_x, data_y, id2label, preprocess_2019.feature_len, class_weight)
            train.train_on_all()
            test_predict_collect = train.predict_on_test(test_x)
        utils.write_predict_res_to_file(args, test_predict_collect)

        if args.train_regression:
            test_x = preprocess_2019.extract_formalized_test_data(formal_test_file)
            if args.event_wise:
                # For event_wise setting, there will be many additional things extracted, what we need is only test_x.
                test_x = test_x[0]
            train_regression.train()
            predict_priority_score = train_regression.predict_on_test(test_x)
            utils.write_predict_score_to_file(predict_priority_score, predict_priority_score_out_file)

    if args.ensemble is not None:
        # TODO(junpeiz): Average the priority score for ensemble.
        # Step4 (optional). Do the ensemble of different model
        if args.event_wise:
            raise NotImplementedError("We don't want to ensemble for event-wise models")
        else:
            out_file = os.path.join(args.out_dir, 'ensemble_out.txt')
            # Note the file list contains predictions from all models with and without the '-event' suffix.
            # So, we need to train both event-wise and not event-wise models or just delete those files in the folder.
            dev_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'dev_predict_')
            test_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'test_predict_')
            train_x = utils.get_ensemble_feature(dev_predict_file_list)
            train_y = utils.get_ensemble_label(args.dev_label_file)
            print("The shape of ensemble train_x is {0}".format(train_x.shape))
            utils.ensemble_cross_validate(train_x, train_y, id2label, train.mlb, args.ensemble)
            test_x = utils.get_ensemble_feature(test_predict_file_list)
            predict = utils.ensemble_train_and_predict(train_x, train.mlb.transform(train_y), test_x,
                                                       id2label, args.ensemble)
            predict = [id2label[x] for x in predict]
            with open(out_file, 'w', encoding='utf8') as f:
                for it_predict in predict:
                    f.write("{}\n".format(it_predict))
            print("The ensemble result has been written to {}".format(out_file))
예제 #19
0
 def __init__(self, generator, height=48, width=48, batch_size=1):
     super(EvalPuppet, self).__init__()
     self.batch_size = batch_size
     self.generator = generator
     self.postprocess = PostProcess(height, width)
     self.evaluate = Eval()