def read_pfd(self): pfds_list = self.pfds_list labels = self.labels count = self.datasize profiles = np.empty((count, 1, 64)) DMcs = np.empty((count, 1, 64)) TvPs = np.empty((count, 64, 64)) FvPs = np.empty((count, 64, 64)) n = 0 for pfd_file in pfds_list: apfd = pfdreader(pfd_file) TvP = apfd.getdata(intervals=64).reshape(64, 64) FvP = apfd.getdata(subbands=64).reshape(64, 64) profile = apfd.getdata(phasebins=64) DMc = apfd.getdata(DMbins=64) TvPs[n] = TvP FvPs[n] = FvP profiles[n] = profile DMcs[n] = DMc n = n + 1 pulsar_flag = np.array(labels) print(pulsar_flag) self.save_pickle(TvPs, self.save_path + 'TvPs_2.pkl') self.save_pickle(FvPs, self.save_path + 'FvPs_2.pkl') self.save_pickle(pulsar_flag, self.save_path + 'PulsarFlag_2.pkl') self.save_pickle(profiles, self.save_path + 'profiles_2.pkl') self.save_pickle(DMcs, self.save_path + 'DMcs_2.pkl')
def extract_and_score(opts): path = opts.in_path file_type = opts.file_type # Load model classifiers = [] for model in models: with open(os.path.join(AI_PATH, model), "rb") as f: classifiers.append(cPickle.load(f)) log.info("Loaded model {}".format(model)) # Find all files arfiles = sorted(glob.glob("{}/*.{}".format(path, file_type)), key=get_id_from_cand_file) log.info("Retrieved {} archive files from {}".format(len(arfiles), path)) scores = [] readers = [pfdreader(f) for f in arfiles] for classifier in classifiers: scores.append(classifier.report_score(readers)) log.info("Scored with all models") #combined = sorted(zip(arfiles, *scores), reverse=True, key=lambda x: x[1]) Skip sorting combined = zip(arfiles, *scores) #log.info("Sorted scores...") names = ",".join(["{}".format(model.split("/")[-1]) for model in models]) with open("{}/pics_scores.txt".format(path), "w") as fout: fout.write("arfile,{}\n".format(names)) for row in combined: scores = ",".join(map(str, row[1:])) fout.write("{},{}\n".format(row[0], scores)) log.info("Written to file in {}".format(path))
def _compute_rating(self, cand): """Return a rating for the candidate. The rating value is the prepfold sigma value. Input: cand: A Candidate object to rate. Output: value: The rating value. """ pred = classifier.report_score([pfdreader(cand.spdfn)]) return pred
def _compute_rating(self, cand): """Return a rating for the candidate. The rating value is the prepfold sigma value. Input: cand: A Candidate object to rate. Output: value: The rating value. """ pfd_fn = cand.get_from_cache('pfd').pfd_filename pred = classifier.report_score([pfdreader(pfd_fn)]) return pred
def confirmCandidate(pfdFile): AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1]) #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_HTRU.pkl','rb')) #classifier = cPickle.load(open('../clfl2_PALFA_1.pkl','rb')) classifier = cPickle.load( open(AI_PATH + '/trained_AI/clfl2_PALFA.pkl', 'rb')) #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_BD.pkl','rb')) #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_ResNet.pkl','rb')) tmp = [] tmp.append(pfdFile) AI_scores = classifier.report_score([pfdreader(f) for f in tmp]) text = '\n'.join( ['%s %s' % (tmp[i], AI_scores[i]) for i in range(len(tmp))]) return text
def classify_by_path(self): with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('tmp/checkpoints/') new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta') new_saver.restore(sess, ckpt.model_checkpoint_path) graph = tf.get_default_graph() fvp = graph.get_tensor_by_name('input1:0') tvp = graph.get_tensor_by_name('input2:0') dm = graph.get_tensor_by_name('input3:0') pro = graph.get_tensor_by_name('input4:0') rate = graph.get_tensor_by_name('rate:0') training = graph.get_tensor_by_name('is_training:0') y = graph.get_tensor_by_name('output:0') str_info = [] t_1 = time.time() for file in glob.glob(self.path + '*.pfd'): print file apfd = pfdreader(file) TvP = apfd.getdata(intervals=64).reshape(64, 64) new_TvP = np.array(TvP) data_TvP = np.empty([1, 64, 64, 1]) data_TvP[0, :, :, 0] = new_TvP FvP = apfd.getdata(subbands=64).reshape(64, 64) new_FvP = np.array(FvP) data_FvP = np.empty([1, 64, 64, 1]) data_FvP[0, :, :, 0] = new_FvP profile = apfd.getdata(phasebins=64) new_profile = np.array(profile) data_profile = np.empty([1, 64, 1]) data_profile[0, :, 0] = np.transpose(new_profile) dmb = apfd.getdata(DMbins=64) new_dmb = np.array(dmb) data_dmb = np.empty([1, 64, 1]) data_dmb[0, :, 0] = np.transpose(new_dmb) result = sess.run(y, feed_dict={fvp: data_FvP, tvp: data_TvP, dm: data_dmb, pro: data_profile, rate: 0, training: False}) # label = np.argmax(result, 1) proba = np.float32(result[0][1]) str_info = file + '.png ' + str(proba) + '\n' # str_info = file + ': ' + str(label) + '\n' with open('tmp/fast_zhu_result.txt', 'a') as f: f.write(str_info) t_2 = time.time() - t_1 print('Classifying complete in {:.0f} m {:.0f} s'.format(t_2 // 60, t_2 % 60))
def classify_gbncc(self): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt = tf.train.get_checkpoint_state('tmp/checkpoints/') new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta') new_saver.restore(sess, ckpt.model_checkpoint_path) graph = tf.get_default_graph() fvp = graph.get_tensor_by_name('input1:0') tvp = graph.get_tensor_by_name('input2:0') dm = graph.get_tensor_by_name('input3:0') profile = graph.get_tensor_by_name('input4:0') rate = graph.get_tensor_by_name('rate:0') training = graph.get_tensor_by_name('is_training:0') y = graph.get_tensor_by_name('output:0') self.predict = [] pfd_path = '/data/whf/AI/training/GBNCC_ARCC_rated/GBNCC_beams/' for pfd in self.pfdfiles: apfd = pfdreader(pfd_path + pfd) res = apfd.getdata(intervals=64, subbands=64, phasebins=64, DMbins=64) data_TvP = np.empty([1, 64, 64, 1]) data_TvP[0, :, :, 0] = res[0:4096].reshape((64, 64)) data_Fvp = np.empty([1, 64, 64, 1]) data_Fvp[0, :, :, 0] = res[4096:8192].reshape((64, 64)) data_profile = np.empty([1, 64, 1]) data_profile[0, :, 0] = res[8192:8256] data_dmb = np.empty([1, 64, 1]) data_dmb[0, :, 0] = res[8256:8320] result = sess.run(y, feed_dict={fvp: data_Fvp, tvp: data_TvP, dm: data_dmb, profile: data_profile, rate: 0, training: False}) prob = (result[0][1]-0.26)/(0.74 - 0.26) self.predict.append(prob) def flatten(listOfLists): return chain.from_iterable(listOfLists) cand_scores = np.array(list(flatten([self.predict]))) psr_scores = cand_scores[self.pulsars] harmonic_scores = cand_scores[self.harmonics] result = {'cand_scores': cand_scores, 'psr_scores': psr_scores, 'harmonic_scores': harmonic_scores, 'cands': self.cands, 'pulsars': self.pulsars, 'harmonics': self.harmonics, 'pfdfiles': self.pfdfiles} cPickle.dump(result, open('tmp/scores_ensemble_GBNCC.pkl', 'wb'), protocol=2) print 'done, result saved to ./scores_ensemble_GBNCC.pkl' fo = open('tmp/gbncc_prob.txt', 'w') for i, f in enumerate(self.pfdfiles): fo.write('%s %s %s\n' % (f, self.cands[i], self.predict[i])) fo.close()
def select(): AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1]) #pfd_path = '/data/public/AI_data/pzc_pfds/' pfd_path = './' #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_HTRU.pkl','rb')) #classifier = cPickle.load(open('../clfl2_PALFA_1.pkl','rb')) classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_PALFA.pkl','rb')) #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_BD.pkl','rb')) #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_ResNet.pkl','rb')) pfdfile = glob.glob(pfd_path + '*.pfd') print pfdfile AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile]) text = '\n'.join(['%s %s' % (pfdfile[i], AI_scores[i]) for i in range(len(pfdfile))]) print text, type(text) fout = open('testResult.txt', 'w') fout.write(text) fout.close()
def extract_and_score(path): # Load model classifiers = LOADED_MODELS # Find all files arfiles = glob.glob("{}/*.ar".format(path)) log.info("Retrieved {} archive files from {}".format(len(arfiles), path)) if len(arfiles) == 0: return scores = [] readers = [pfdreader(f) for f in arfiles] for classifier in classifiers: scores.append(classifier.report_score(readers)) log.info("Scored with all models") combined = sorted(zip(arfiles, *scores), reverse=True, key=lambda x: x[1]) log.info("Sorted scores...") names = "\t".join(["#{}".format(model.split("/")[-1]) for model in MODELS]) with open("{}/pics_scores.txt".format(path), "w") as fout: fout.write("#arfile\t{}\n".format(names)) for row in combined: scores = ",".join(map(str, row[1:])) fout.write("{},{}\n".format(row[0], scores)) log.info("Written to file in {}".format(path))
import numpy as np from subprocess import call sys.path.append('/home/psr') import cPickle, glob, ubc_AI from ubc_AI.data import pfdreader import time AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1]) classifier = cPickle.load(open(AI_PATH + '/trained_AI/clfl2_PALFA.pkl', 'rb')) pfdfile = glob.glob('ubc_AI/pfd_files/*.pfd') + glob.glob( 'ubc_AI/pfd_files/*.ar') + glob.glob('ubc_AI/pfd_files/*.ar2') + glob.glob( 'ubc_AI/pfd_files/*.spd') timeout = time.time() + 60 * 15 # 15 minutes from now data = '' i = 0 while True: if time.time() > timeout: break line = sys.stdin.readline() if line.strip() != 'eof': data += line continue else: with open('test.pfd', 'wb') as f: f.write(data) data = '' AI_scores = classifier.report_score(pfdreader('test.pfd')) print os.path.basename(os.path.normpath(pfdfile[i])), "%.6f" % AI_scores sys.stdout.flush() time.sleep(1) i += 1
sys.path.append('/home/psr') import cPickle, glob, ubc_AI from ubc_AI.data import pfdreader from ubc_AI.prepfold import pfd import time AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1]) #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_PALFA.pkl','rb')) classifier = cPickle.load(open(AI_PATH + '/trained_AI/' + sys.argv[1], 'rb')) pfdfile = glob.glob('ubc_AI/pfd_files/*.pfd') + glob.glob( 'ubc_AI/pfd_files/*.ar') + glob.glob('ubc_AI/pfd_files/*.ar2') + glob.glob( 'ubc_AI/pfd_files/*.spd') timeout = time.time() + 60 * 15 # 15 minutes from now data = '' i = 0 while True: if time.time() > timeout: break line = sys.stdin.readline() if line.strip() != 'eof': data += line continue else: with open('/dev/shm/test.pfd', 'wb') as f: f.write(data) data = '' AI_scores = classifier.report_score(pfdreader('/dev/shm/test.pfd')) print os.path.basename(os.path.normpath(pfdfile[i])), "%.6f" % AI_scores sys.stdout.flush() time.sleep(1) i += 1
def extract_and_score(opts, info): # Load model classifier = cPickle.load( open(AI_PATH + '/trained_AI/%s' % info["model"], 'rb')) log.info("Loaded model %s" % info["model"]) # Find all files pfdfile = glob.glob('%s/*.pfd' % (info["input_path"])) log.info("Retrieved pfd files from %s" % (info["input_path"])) #Setup database connection log.debug("Connecting to TrapumDB...") try: trapum = trapum_db_score.TrapumDataBase("db_host_ip", "db_port", "db_name", "user", "passwd") t = trapum.connect() except: log.error("DB connection failed") pass log.debug("Connected to TrapumDB") #Get pipeline ID pipeline_id = trapum.get_pipeline_id_from_name("PICS_Original") # Create Processing #submit_time = str(datetime.now()) #processing_id = trapum.create_processing(pipeline_id,submit_time,"queued") # Get processing processing_id = info["processing_id"] # Create_processing_pivot_entries dp_id = info["dp_id"] pp_id = trapum.create_pivot(dp_id, processing_id) # Start time update start_time = str(datetime.now()) trapum.update_start_time(start_time, processing_id) AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile]) log.info("Scored with model %s" % info["model"]) # Sort based on highest score pfdfile_sorted = [ x for _, x in sorted(zip(AI_scores, pfdfile), reverse=True) ] AI_scores_sorted = sorted(AI_scores, reverse=True) log.info("Sorted scores..") text = '\n'.join([ '%s %s' % (pfdfile_sorted[i], AI_scores_sorted[i]) for i in range(len(pfdfile)) ]) fout = open('%s/pics_original_descending_scores.txt' % info["input_path"], 'w') fout.write(text) log.info("Written to file in %s" % info["input_path"]) fout.close() # End time update end_time = str(datetime.now()) trapum.update_end_time(end_time, processing_id) trapum.update_processing_status("Successful", processing_id) #tar all files in this directory tar_name = os.path.basename(info["input_path"]) + "_presto_cands.tar" make_tarfile(info["input_path"], info["input_path"], tar_name) # Get pointing and beam id from one of the dp_ids try: pb_list = trapum.get_pb_from_dp(dp_id) except Exception as error: log.error(error) # Update Dataproducts with pics output entry and tarred files dp_id = trapum.create_secondary_dataproduct( pb_list[0], pb_list[1], processing_id, 1, "pics_original_descending_scores.txt", str(info["input_path"]), 12) dp_id = trapum.create_secondary_dataproduct(pb_list[0], pb_list[1], processing_id, 1, str(tar_name), str(info["input_path"]), 11) # Remove original files subprocess.check_call("rm *pfd* *.txt", shell=True, cwd=str(info["input_path"]))
def classify_by_path(self): with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('tmp/checkpoints/') new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta') new_saver.restore(sess, ckpt.model_checkpoint_path) graph = tf.get_default_graph() fvp = graph.get_tensor_by_name('input1:0') tvp = graph.get_tensor_by_name('input2:0') dm = graph.get_tensor_by_name('input3:0') pro = graph.get_tensor_by_name('input4:0') rate = graph.get_tensor_by_name('rate:0') training = graph.get_tensor_by_name('is_training:0') y = graph.get_tensor_by_name('output:0') # str_info = [] t_1 = time.time() with open(self.txt_file, 'r') as f: processed_pfds = np.genfromtxt(self.txt_file, dtype=[('fn', '|S200'), ('s', 'f')]) processed_pfds_set = set(processed_pfds['fn']) print 'we already classified ', len( processed_pfds_set), 'files' with open(self.txt_file, 'a') as f: allpfds = glob.glob(self.path + '*.pfd') pb = PB(maxValue=len(allpfds)) allpfds_set = set(allpfds) to_process_set = allpfds_set - processed_pfds_set print len(to_process_set), 'files to be classified.' for i, pfd in enumerate(sorted(list(to_process_set))): #print pfd apfd = pfdreader(pfd) #TvP = apfd.getdata(intervals=64).reshape(64, 64) #new_TvP = np.array(TvP) #data_TvP = np.empty([1, 64, 64, 1]) #data_TvP[0, :, :, 0] = new_TvP #FvP = apfd.getdata(subbands=64).reshape(64, 64) #new_FvP = np.array(FvP) #data_FvP = np.empty([1, 64, 64, 1]) #data_FvP[0, :, :, 0] = new_FvP #profile = apfd.getdata(phasebins=64) #new_profile = np.array(profile) #data_profile = np.empty([1, 64, 1]) #data_profile[0, :, 0] = np.transpose(new_profile) #dmb = apfd.getdata(DMbins=64) #new_dmb = np.array(dmb) #data_dmb = np.empty([1, 64, 1]) #data_dmb[0, :, 0] = np.transpose(new_dmb) res = apfd.getdata(intervals=64, subbands=64, phasebins=64, DMbins=64) data_TvP = np.empty([1, 64, 64, 1]) data_TvP[0, :, :, 0] = res[0:4096].reshape((64, 64)) data_FvP = np.empty([1, 64, 64, 1]) data_FvP[0, :, :, 0] = res[4096:8192].reshape((64, 64)) data_profile = np.empty([1, 64, 1]) data_profile[0, :, 0] = res[8192:8256] data_dmb = np.empty([1, 64, 1]) data_dmb[0, :, 0] = res[8256:8320] result = sess.run(y, feed_dict={ fvp: data_FvP, tvp: data_TvP, dm: data_dmb, pro: data_profile, rate: 0, training: False }) # label = np.argmax(result, 1) proba = np.float32(result[0][1]) str_info = pfd + ' ' + str(proba) + '\n' f.write(str_info) if i % 10 == 0: pb(i) t_2 = time.time() - t_1 print('Classifying complete in {:.0f} m {:.0f} s'.format( t_2 // 60, t_2 % 60))
import cPickle, glob, ubc_AI from ubc_AI.data import pfdreader AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1]) classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_PALFA.pkl','rb')) pfdfile = glob.glob('*.pfd') + glob.glob('*.ar') + glob.glob('*.ar2') + glob.glob('*.spd') AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile]) text = '\n'.join(['%s %s' % (pfdfile[i], AI_scores[i]) for i in range(len(pfdfile))]) fout = open('clfresult.txt', 'w') fout.write(text) fout.close()
import cPickle, glob, ubc_AI from ubc_AI.data import pfdreader AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1]) classifier = cPickle.load(open(AI_PATH + '/trained_AI/clfl2_BD.pkl', 'rb')) pfdfile = glob.glob('*.pfd') print pfdfile AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile]) text = '\n'.join( ['%s %s' % (pfdfile[i], AI_scores[i]) for i in range(len(pfdfile))]) fout = open('clfresult.txt', 'w') fout.write(text) fout.close()
def write_record(save_floder, read_path): if not os.path.exists(save_floder): os.makedirs(save_floder) if not os.path.exists(read_path): print("File path wrong, please set right reading path") pfd_list = [] labels = [] for _, dirs, _ in os.walk(read_path): for dir in dirs: for _, _, files in os.walk(os.path.join(read_path, dir)): for file in files: file_path = os.path.join(read_path, dir, file) pfd_list.append(file_path) labels.append(dir) shuffle_pfdlist = [] shuffle_labels = [] idx = np.random.permutation(len(labels)) for i in idx: shuffle_pfdlist.append(pfd_list[i]) shuffle_labels.append(labels[i]) train_file_num = np.int(0.8 * len(shuffle_labels)) n = 0 train_record_path = save_floder + "train_pfd.tfrecords" valid_record_path = save_floder + "valid_pfd.tfrecords" train_pfd_writer = tf.python_io.TFRecordWriter(train_record_path) vaild_pfd_writer = tf.python_io.TFRecordWriter(valid_record_path) for pfd_file in shuffle_pfdlist: apfd = pfdreader(pfd_file) TvP = apfd.getdata(intervals=64).reshape(64, 64) new_TvP = np.array(TvP) data_TvP = np.empty([64, 64, 1]) data_TvP[:, :, 0] = new_TvP tvp_raw = data_TvP.tostring() FvP = apfd.getdata(subbands=64).reshape(64, 64) new_FvP = np.array(FvP) data_FvP = np.empty([64, 64, 1]) data_FvP[:, :, 0] = new_FvP fvp_raw = data_FvP.tostring() profile = apfd.getdata(phasebins=64) new_profile = np.array(profile) data_profile = np.empty([64, 1]) data_profile[:, 0] = np.transpose(new_profile) prof_raw = data_profile.tostring() dmb = apfd.getdata(DMbins=64) new_dmb = np.array(dmb) data_dmb = np.empty([64, 1]) data_dmb[:, 0] = np.transpose(new_dmb) dm_raw = data_dmb.tostring() raw_label = np.int64(shuffle_labels[n]) example = tf.train.Example(features=tf.train.Features( feature={ 'fvp': tf.train.Feature(bytes_list=tf.train.BytesList( value=[fvp_raw])), 'tvp': tf.train.Feature(bytes_list=tf.train.BytesList( value=[tvp_raw])), 'prof': tf.train.Feature(bytes_list=tf.train.BytesList( value=[prof_raw])), 'dm': tf.train.Feature(bytes_list=tf.train.BytesList( value=[dm_raw])), 'label': tf.train.Feature(int64_list=tf.train.Int64List( value=[raw_label])) })) if n < train_file_num: train_pfd_writer.write(example.SerializeToString()) else: vaild_pfd_writer.write(example.SerializeToString()) n += 1 train_pfd_writer.close() vaild_pfd_writer.close() print("pfd data to tfrecord data have finished!")