def get_pred_ref(input_file): import tokenizer prep('preparing predictions list... ') preds = dict() predicts = open(input_file, 'r') for c, line in enumerate(predicts): (fid, pred) = line.split('\t') fid = int(fid) pred = pred.split() pred = fil(pred) preds[fid] = pred predicts.close() drop() re_0001_ = re.compile(r'([^a-zA-Z0-9 ])|([a-z0-9_][A-Z])' ) # not sure what this is ? vocabulary? refs = dict() newpreds = dict() d = 0 targets = open('%s/coms.test' % (dataprep), 'r') for line in targets: (fid, com) = line.split(',') fid = int(fid) com = com.split() com = fil(com) try: newpreds[fid] = preds[fid] except KeyError as ex: continue refs[fid] = [com] return newpreds, refs
if outfile is None: outfile = modelfile.split('/')[-1] K.set_floatx(args.dtype) os.environ['CUDA_VISIBLE_DEVICES'] = gpu os.environ['TF_CPP_MIN_LOG_LEVEL'] = args.tf_loglevel sys.path.append(dataprep) import tokenizer prep('loading tokenizers... ') tdatstok = pickle.load(open('%s/tdats.tok' % (dataprep), 'rb'), encoding='UTF-8') comstok = pickle.load(open('%s/coms.tok' % (dataprep), 'rb'), encoding='UTF-8') smltok = pickle.load(open('%s/smls.tok' % (dataprep), 'rb'), encoding='UTF-8') drop() prep('loading sequences... ') seqdata = pickle.load(open('%s/%s' % (dataprep, datfile), 'rb')) drop() print(zerodats) if zerodats == 'yes': zerodats = True else: zerodats = False print(zerodats) if zerodats: v = np.zeros(100) for key, val in seqdata['dttrain'].items():