Python pfdreaderの例、ubc_AI.data.pfdreader Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pfd_to_pickle.py プロジェクト: dzuwhf/ensem_tensorflow

    def read_pfd(self):
        pfds_list = self.pfds_list
        labels = self.labels
        count = self.datasize
        profiles = np.empty((count, 1, 64))
        DMcs = np.empty((count, 1, 64))
        TvPs = np.empty((count, 64, 64))
        FvPs = np.empty((count, 64, 64))
        n = 0
        for pfd_file in pfds_list:
            apfd = pfdreader(pfd_file)
            TvP = apfd.getdata(intervals=64).reshape(64, 64)
            FvP = apfd.getdata(subbands=64).reshape(64, 64)
            profile = apfd.getdata(phasebins=64)
            DMc = apfd.getdata(DMbins=64)
            TvPs[n] = TvP
            FvPs[n] = FvP
            profiles[n] = profile
            DMcs[n] = DMc
            n = n + 1
        pulsar_flag = np.array(labels)
        print(pulsar_flag)

        self.save_pickle(TvPs, self.save_path + 'TvPs_2.pkl')
        self.save_pickle(FvPs, self.save_path + 'FvPs_2.pkl')
        self.save_pickle(pulsar_flag, self.save_path + 'PulsarFlag_2.pkl')
        self.save_pickle(profiles, self.save_path + 'profiles_2.pkl')
        self.save_pickle(DMcs, self.save_path + 'DMcs_2.pkl')

コード例 #2

0

ファイルを表示

def extract_and_score(opts):
    path = opts.in_path
    file_type = opts.file_type
    # Load model
    classifiers = []
    for model in models:
        with open(os.path.join(AI_PATH, model), "rb") as f:
            classifiers.append(cPickle.load(f))
            log.info("Loaded model {}".format(model))
    # Find all files
    arfiles = sorted(glob.glob("{}/*.{}".format(path, file_type)),
                     key=get_id_from_cand_file)
    log.info("Retrieved {} archive files from {}".format(len(arfiles), path))
    scores = []
    readers = [pfdreader(f) for f in arfiles]
    for classifier in classifiers:
        scores.append(classifier.report_score(readers))
    log.info("Scored with all models")
    #combined = sorted(zip(arfiles, *scores), reverse=True, key=lambda x: x[1]) Skip sorting
    combined = zip(arfiles, *scores)
    #log.info("Sorted scores...")
    names = ",".join(["{}".format(model.split("/")[-1]) for model in models])
    with open("{}/pics_scores.txt".format(path), "w") as fout:
        fout.write("arfile,{}\n".format(names))
        for row in combined:
            scores = ",".join(map(str, row[1:]))
            fout.write("{},{}\n".format(row[0], scores))
    log.info("Written to file in {}".format(path))

コード例 #3

0

ファイルを表示

ファイル: ubc_spd_ai.py プロジェクト: zpleunis/Ratings2.0

    def _compute_rating(self, cand):
        """Return a rating for the candidate. The rating value is 
            the prepfold sigma value.

            Input:
                cand: A Candidate object to rate.

            Output:
                value: The rating value.
        """
        pred = classifier.report_score([pfdreader(cand.spdfn)])
        return pred

コード例 #4

0

ファイルを表示

ファイル: ubc_spd_ai.py プロジェクト: pscholz/Ratings2.0

    def _compute_rating(self, cand):
        """Return a rating for the candidate. The rating value is 
            the prepfold sigma value.

            Input:
                cand: A Candidate object to rate.

            Output:
                value: The rating value.
        """
        pred = classifier.report_score([pfdreader(cand.spdfn)])
        return pred

コード例 #5

0

ファイルを表示

ファイル: ubc_pfd_ai.py プロジェクト: zpleunis/Ratings2.0

    def _compute_rating(self, cand):
        """Return a rating for the candidate. The rating value is 
            the prepfold sigma value.

            Input:
                cand: A Candidate object to rate.

            Output:
                value: The rating value.
        """
        pfd_fn = cand.get_from_cache('pfd').pfd_filename

        pred = classifier.report_score([pfdreader(pfd_fn)])
        return pred

コード例 #6

0

ファイルを表示

ファイル: ubc_pfd_ai.py プロジェクト: plazar/Ratings2.0

    def _compute_rating(self, cand):
        """Return a rating for the candidate. The rating value is 
            the prepfold sigma value.

            Input:
                cand: A Candidate object to rate.

            Output:
                value: The rating value.
        """
        pfd_fn = cand.get_from_cache('pfd').pfd_filename

        pred = classifier.report_score([pfdreader(pfd_fn)])
        return pred

コード例 #7

0

ファイルを表示

def confirmCandidate(pfdFile):
    AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1])
    #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_HTRU.pkl','rb'))
    #classifier = cPickle.load(open('../clfl2_PALFA_1.pkl','rb'))
    classifier = cPickle.load(
        open(AI_PATH + '/trained_AI/clfl2_PALFA.pkl', 'rb'))
    #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_BD.pkl','rb'))
    #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_ResNet.pkl','rb'))
    tmp = []
    tmp.append(pfdFile)
    AI_scores = classifier.report_score([pfdreader(f) for f in tmp])

    text = '\n'.join(
        ['%s %s' % (tmp[i], AI_scores[i]) for i in range(len(tmp))])
    return text

コード例 #8

0

ファイルを表示

ファイル: classify_by_path.py プロジェクト: dzuwhf/ensem_tensorflow

    def classify_by_path(self):

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state('tmp/checkpoints/')
            new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
            new_saver.restore(sess, ckpt.model_checkpoint_path)
            graph = tf.get_default_graph()
            fvp = graph.get_tensor_by_name('input1:0')
            tvp = graph.get_tensor_by_name('input2:0')
            dm = graph.get_tensor_by_name('input3:0')
            pro = graph.get_tensor_by_name('input4:0')
            rate = graph.get_tensor_by_name('rate:0')
            training = graph.get_tensor_by_name('is_training:0')
            y = graph.get_tensor_by_name('output:0')
            str_info = []
            t_1 = time.time()
            for file in glob.glob(self.path + '*.pfd'):
                print file
                apfd = pfdreader(file)
                TvP = apfd.getdata(intervals=64).reshape(64, 64)
                new_TvP = np.array(TvP)
                data_TvP = np.empty([1, 64, 64, 1])
                data_TvP[0, :, :, 0] = new_TvP
                FvP = apfd.getdata(subbands=64).reshape(64, 64)
                new_FvP = np.array(FvP)
                data_FvP = np.empty([1, 64, 64, 1])
                data_FvP[0, :, :, 0] = new_FvP
                profile = apfd.getdata(phasebins=64)
                new_profile = np.array(profile)
                data_profile = np.empty([1, 64, 1])
                data_profile[0, :, 0] = np.transpose(new_profile)
                dmb = apfd.getdata(DMbins=64)
                new_dmb = np.array(dmb)
                data_dmb = np.empty([1, 64, 1])
                data_dmb[0, :, 0] = np.transpose(new_dmb)
                result = sess.run(y, feed_dict={fvp: data_FvP, tvp: data_TvP, dm: data_dmb, pro: data_profile,
                                                rate: 0, training: False})
                # label = np.argmax(result, 1)
                proba = np.float32(result[0][1])
                str_info = file + '.png ' + str(proba) + '\n'
                # str_info = file + ': ' + str(label) + '\n'
                with open('tmp/fast_zhu_result.txt', 'a') as f:
                    f.write(str_info)
            t_2 = time.time() - t_1
            print('Classifying complete in {:.0f} m {:.0f} s'.format(t_2 // 60, t_2 % 60))

コード例 #9

0

ファイルを表示

ファイル: test_gbncc.py プロジェクト: NAOC-pulsar/ensem_tensorflow

    def classify_gbncc(self):
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt = tf.train.get_checkpoint_state('tmp/checkpoints/')
            new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
            new_saver.restore(sess, ckpt.model_checkpoint_path)
            graph = tf.get_default_graph()
            fvp = graph.get_tensor_by_name('input1:0')
            tvp = graph.get_tensor_by_name('input2:0')
            dm = graph.get_tensor_by_name('input3:0')
            profile = graph.get_tensor_by_name('input4:0')
            rate = graph.get_tensor_by_name('rate:0')
            training = graph.get_tensor_by_name('is_training:0')
            y = graph.get_tensor_by_name('output:0')
            self.predict = []
            pfd_path = '/data/whf/AI/training/GBNCC_ARCC_rated/GBNCC_beams/'
            for pfd in self.pfdfiles:
                apfd = pfdreader(pfd_path + pfd)
                res = apfd.getdata(intervals=64, subbands=64, phasebins=64, DMbins=64)
                data_TvP = np.empty([1, 64, 64, 1])
                data_TvP[0, :, :, 0] = res[0:4096].reshape((64, 64))
                data_Fvp = np.empty([1, 64, 64, 1])
                data_Fvp[0, :, :, 0] = res[4096:8192].reshape((64, 64))
                data_profile = np.empty([1, 64, 1])
                data_profile[0, :, 0] = res[8192:8256]
                data_dmb = np.empty([1, 64, 1])
                data_dmb[0, :, 0] = res[8256:8320]

                result = sess.run(y, feed_dict={fvp: data_Fvp, tvp: data_TvP, dm: data_dmb, profile: data_profile,
                                                rate: 0, training: False})
                prob = (result[0][1]-0.26)/(0.74 - 0.26)
                self.predict.append(prob)
        def flatten(listOfLists):
            return chain.from_iterable(listOfLists)
        cand_scores = np.array(list(flatten([self.predict])))
        psr_scores = cand_scores[self.pulsars]
        harmonic_scores = cand_scores[self.harmonics]
        result = {'cand_scores': cand_scores, 'psr_scores': psr_scores, 'harmonic_scores': harmonic_scores,
                  'cands': self.cands, 'pulsars': self.pulsars, 'harmonics': self.harmonics, 'pfdfiles': self.pfdfiles}
        cPickle.dump(result, open('tmp/scores_ensemble_GBNCC.pkl', 'wb'), protocol=2)
        print 'done, result saved to ./scores_ensemble_GBNCC.pkl'

        fo = open('tmp/gbncc_prob.txt', 'w')
        for i, f in enumerate(self.pfdfiles):
            fo.write('%s %s %s\n' % (f, self.cands[i], self.predict[i]))
        fo.close()

コード例 #10

0

ファイルを表示

ファイル: ubcAI-test.py プロジェクト: Miao-cc/simPipe

def select():
    AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1])
    #pfd_path = '/data/public/AI_data/pzc_pfds/'
    pfd_path = './'
    #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_HTRU.pkl','rb'))
    #classifier = cPickle.load(open('../clfl2_PALFA_1.pkl','rb'))
    classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_PALFA.pkl','rb'))
    #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_BD.pkl','rb'))
    #classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_ResNet.pkl','rb'))
    
    pfdfile = glob.glob(pfd_path + '*.pfd')
    print pfdfile
    AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile])
    
    text = '\n'.join(['%s %s' % (pfdfile[i], AI_scores[i]) for i in range(len(pfdfile))])
    print text, type(text)
    fout = open('testResult.txt', 'w')
    fout.write(text)
    fout.close()

コード例 #11

0

ファイルを表示

ファイル: candidate_cut.py プロジェクト: MPIfR-BDG/trapum-db

def extract_and_score(path):
    # Load model
    classifiers = LOADED_MODELS
    # Find all files
    arfiles = glob.glob("{}/*.ar".format(path))
    log.info("Retrieved {} archive files from {}".format(len(arfiles), path))
    if len(arfiles) == 0:
        return
    scores = []
    readers = [pfdreader(f) for f in arfiles]
    for classifier in classifiers:
        scores.append(classifier.report_score(readers))
    log.info("Scored with all models")
    combined = sorted(zip(arfiles, *scores), reverse=True, key=lambda x: x[1])
    log.info("Sorted scores...")
    names = "\t".join(["#{}".format(model.split("/")[-1]) for model in MODELS])
    with open("{}/pics_scores.txt".format(path), "w") as fout:
        fout.write("#arfile\t{}\n".format(names))
        for row in combined:
            scores = ",".join(map(str, row[1:]))
            fout.write("{},{}\n".format(row[0], scores))
    log.info("Written to file in {}".format(path))

コード例 #12

0

ファイルを表示

ファイル: quickclf.py プロジェクト: vasyunina/sun

import numpy as np
from subprocess import call
sys.path.append('/home/psr')
import cPickle, glob, ubc_AI
from ubc_AI.data import pfdreader
import time
AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1])
classifier = cPickle.load(open(AI_PATH + '/trained_AI/clfl2_PALFA.pkl', 'rb'))
pfdfile = glob.glob('ubc_AI/pfd_files/*.pfd') + glob.glob(
    'ubc_AI/pfd_files/*.ar') + glob.glob('ubc_AI/pfd_files/*.ar2') + glob.glob(
        'ubc_AI/pfd_files/*.spd')
timeout = time.time() + 60 * 15  # 15 minutes from now
data = ''
i = 0
while True:
    if time.time() > timeout:
        break
    line = sys.stdin.readline()
    if line.strip() != 'eof':
        data += line
        continue
    else:
        with open('test.pfd', 'wb') as f:
            f.write(data)
        data = ''
    AI_scores = classifier.report_score(pfdreader('test.pfd'))
    print os.path.basename(os.path.normpath(pfdfile[i])), "%.6f" % AI_scores
    sys.stdout.flush()
    time.sleep(1)
    i += 1

コード例 #13

0

ファイルを表示

sys.path.append('/home/psr')
import cPickle, glob, ubc_AI
from ubc_AI.data import pfdreader
from ubc_AI.prepfold import pfd
import time
AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1])
#classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_PALFA.pkl','rb'))
classifier = cPickle.load(open(AI_PATH + '/trained_AI/' + sys.argv[1], 'rb'))
pfdfile = glob.glob('ubc_AI/pfd_files/*.pfd') + glob.glob(
    'ubc_AI/pfd_files/*.ar') + glob.glob('ubc_AI/pfd_files/*.ar2') + glob.glob(
        'ubc_AI/pfd_files/*.spd')
timeout = time.time() + 60 * 15  # 15 minutes from now
data = ''
i = 0
while True:
    if time.time() > timeout:
        break
    line = sys.stdin.readline()
    if line.strip() != 'eof':
        data += line
        continue
    else:
        with open('/dev/shm/test.pfd', 'wb') as f:
            f.write(data)
        data = ''
    AI_scores = classifier.report_score(pfdreader('/dev/shm/test.pfd'))
    print os.path.basename(os.path.normpath(pfdfile[i])), "%.6f" % AI_scores
    sys.stdout.flush()
    time.sleep(1)
    i += 1

コード例 #14

0

ファイルを表示

ファイル: pics_original_scorer_db.py プロジェクト: prajwalvp/trapum_new_pipeline

def extract_and_score(opts, info):
    # Load model
    classifier = cPickle.load(
        open(AI_PATH + '/trained_AI/%s' % info["model"], 'rb'))
    log.info("Loaded model %s" % info["model"])

    # Find all files
    pfdfile = glob.glob('%s/*.pfd' % (info["input_path"]))
    log.info("Retrieved pfd files from %s" % (info["input_path"]))

    #Setup database connection
    log.debug("Connecting to TrapumDB...")
    try:
        trapum = trapum_db_score.TrapumDataBase("db_host_ip", "db_port",
                                                "db_name", "user", "passwd")
        t = trapum.connect()
    except:
        log.error("DB connection failed")
        pass
    log.debug("Connected to TrapumDB")

    #Get pipeline ID
    pipeline_id = trapum.get_pipeline_id_from_name("PICS_Original")

    # Create Processing
    #submit_time = str(datetime.now())
    #processing_id = trapum.create_processing(pipeline_id,submit_time,"queued")

    # Get processing
    processing_id = info["processing_id"]

    # Create_processing_pivot_entries
    dp_id = info["dp_id"]
    pp_id = trapum.create_pivot(dp_id, processing_id)

    # Start time update
    start_time = str(datetime.now())
    trapum.update_start_time(start_time, processing_id)

    AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile])
    log.info("Scored with model %s" % info["model"])

    # Sort based on highest score
    pfdfile_sorted = [
        x for _, x in sorted(zip(AI_scores, pfdfile), reverse=True)
    ]
    AI_scores_sorted = sorted(AI_scores, reverse=True)
    log.info("Sorted scores..")

    text = '\n'.join([
        '%s %s' % (pfdfile_sorted[i], AI_scores_sorted[i])
        for i in range(len(pfdfile))
    ])

    fout = open('%s/pics_original_descending_scores.txt' % info["input_path"],
                'w')
    fout.write(text)
    log.info("Written to file in %s" % info["input_path"])
    fout.close()

    # End time update
    end_time = str(datetime.now())
    trapum.update_end_time(end_time, processing_id)
    trapum.update_processing_status("Successful", processing_id)

    #tar all files in this directory
    tar_name = os.path.basename(info["input_path"]) + "_presto_cands.tar"
    make_tarfile(info["input_path"], info["input_path"], tar_name)

    # Get pointing and beam id from one of the dp_ids
    try:
        pb_list = trapum.get_pb_from_dp(dp_id)
    except Exception as error:
        log.error(error)

    # Update Dataproducts with pics output entry and tarred files
    dp_id = trapum.create_secondary_dataproduct(
        pb_list[0], pb_list[1], processing_id, 1,
        "pics_original_descending_scores.txt", str(info["input_path"]), 12)
    dp_id = trapum.create_secondary_dataproduct(pb_list[0],
                                                pb_list[1], processing_id, 1,
                                                str(tar_name),
                                                str(info["input_path"]), 11)

    # Remove original files
    subprocess.check_call("rm *pfd* *.txt",
                          shell=True,
                          cwd=str(info["input_path"]))

コード例 #15

0

ファイルを表示

ファイル: classify_by_path.py プロジェクト: NAOC-pulsar/ensem_tensorflow

    def classify_by_path(self):

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state('tmp/checkpoints/')
            new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path +
                                                   '.meta')
            new_saver.restore(sess, ckpt.model_checkpoint_path)
            graph = tf.get_default_graph()
            fvp = graph.get_tensor_by_name('input1:0')
            tvp = graph.get_tensor_by_name('input2:0')
            dm = graph.get_tensor_by_name('input3:0')
            pro = graph.get_tensor_by_name('input4:0')
            rate = graph.get_tensor_by_name('rate:0')
            training = graph.get_tensor_by_name('is_training:0')
            y = graph.get_tensor_by_name('output:0')
            # str_info = []
            t_1 = time.time()

            with open(self.txt_file, 'r') as f:
                processed_pfds = np.genfromtxt(self.txt_file,
                                               dtype=[('fn', '|S200'),
                                                      ('s', 'f')])
                processed_pfds_set = set(processed_pfds['fn'])
                print 'we already classified ', len(
                    processed_pfds_set), 'files'

            with open(self.txt_file, 'a') as f:

                allpfds = glob.glob(self.path + '*.pfd')
                pb = PB(maxValue=len(allpfds))
                allpfds_set = set(allpfds)
                to_process_set = allpfds_set - processed_pfds_set
                print len(to_process_set), 'files to be classified.'

                for i, pfd in enumerate(sorted(list(to_process_set))):
                    #print pfd
                    apfd = pfdreader(pfd)

                    #TvP = apfd.getdata(intervals=64).reshape(64, 64)
                    #new_TvP = np.array(TvP)
                    #data_TvP = np.empty([1, 64, 64, 1])
                    #data_TvP[0, :, :, 0] = new_TvP
                    #FvP = apfd.getdata(subbands=64).reshape(64, 64)
                    #new_FvP = np.array(FvP)
                    #data_FvP = np.empty([1, 64, 64, 1])
                    #data_FvP[0, :, :, 0] = new_FvP
                    #profile = apfd.getdata(phasebins=64)
                    #new_profile = np.array(profile)
                    #data_profile = np.empty([1, 64, 1])
                    #data_profile[0, :, 0] = np.transpose(new_profile)
                    #dmb = apfd.getdata(DMbins=64)
                    #new_dmb = np.array(dmb)
                    #data_dmb = np.empty([1, 64, 1])
                    #data_dmb[0, :, 0] = np.transpose(new_dmb)

                    res = apfd.getdata(intervals=64,
                                       subbands=64,
                                       phasebins=64,
                                       DMbins=64)

                    data_TvP = np.empty([1, 64, 64, 1])
                    data_TvP[0, :, :, 0] = res[0:4096].reshape((64, 64))
                    data_FvP = np.empty([1, 64, 64, 1])
                    data_FvP[0, :, :, 0] = res[4096:8192].reshape((64, 64))
                    data_profile = np.empty([1, 64, 1])
                    data_profile[0, :, 0] = res[8192:8256]
                    data_dmb = np.empty([1, 64, 1])
                    data_dmb[0, :, 0] = res[8256:8320]

                    result = sess.run(y,
                                      feed_dict={
                                          fvp: data_FvP,
                                          tvp: data_TvP,
                                          dm: data_dmb,
                                          pro: data_profile,
                                          rate: 0,
                                          training: False
                                      })
                    # label = np.argmax(result, 1)
                    proba = np.float32(result[0][1])

                    str_info = pfd + ' ' + str(proba) + '\n'

                    f.write(str_info)

                    if i % 10 == 0:
                        pb(i)

            t_2 = time.time() - t_1
            print('Classifying complete in {:.0f} m {:.0f} s'.format(
                t_2 // 60, t_2 % 60))

コード例 #16

0

ファイルを表示

ファイル: quickclf.py プロジェクト: zhuww/ubc_AI

import cPickle, glob, ubc_AI
from ubc_AI.data import pfdreader
AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1])
classifier = cPickle.load(open(AI_PATH+'/trained_AI/clfl2_PALFA.pkl','rb'))
pfdfile = glob.glob('*.pfd') + glob.glob('*.ar') + glob.glob('*.ar2') + glob.glob('*.spd')
AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile])

text = '\n'.join(['%s %s' % (pfdfile[i], AI_scores[i]) for i in range(len(pfdfile))])
fout = open('clfresult.txt', 'w')
fout.write(text)
fout.close()

コード例 #17

0

ファイルを表示

import cPickle, glob, ubc_AI
from ubc_AI.data import pfdreader
AI_PATH = '/'.join(ubc_AI.__file__.split('/')[:-1])
classifier = cPickle.load(open(AI_PATH + '/trained_AI/clfl2_BD.pkl', 'rb'))
pfdfile = glob.glob('*.pfd')
print pfdfile
AI_scores = classifier.report_score([pfdreader(f) for f in pfdfile])
text = '\n'.join(
    ['%s %s' % (pfdfile[i], AI_scores[i]) for i in range(len(pfdfile))])
fout = open('clfresult.txt', 'w')
fout.write(text)
fout.close()

コード例 #18

0

ファイルを表示

def write_record(save_floder, read_path):

    if not os.path.exists(save_floder):
        os.makedirs(save_floder)
    if not os.path.exists(read_path):
        print("File path wrong, please set right reading path")

    pfd_list = []
    labels = []
    for _, dirs, _ in os.walk(read_path):
        for dir in dirs:
            for _, _, files in os.walk(os.path.join(read_path, dir)):
                for file in files:
                    file_path = os.path.join(read_path, dir, file)
                    pfd_list.append(file_path)
                    labels.append(dir)
    shuffle_pfdlist = []
    shuffle_labels = []

    idx = np.random.permutation(len(labels))

    for i in idx:
        shuffle_pfdlist.append(pfd_list[i])
        shuffle_labels.append(labels[i])

    train_file_num = np.int(0.8 * len(shuffle_labels))
    n = 0
    train_record_path = save_floder + "train_pfd.tfrecords"
    valid_record_path = save_floder + "valid_pfd.tfrecords"
    train_pfd_writer = tf.python_io.TFRecordWriter(train_record_path)
    vaild_pfd_writer = tf.python_io.TFRecordWriter(valid_record_path)

    for pfd_file in shuffle_pfdlist:
        apfd = pfdreader(pfd_file)
        TvP = apfd.getdata(intervals=64).reshape(64, 64)
        new_TvP = np.array(TvP)
        data_TvP = np.empty([64, 64, 1])
        data_TvP[:, :, 0] = new_TvP
        tvp_raw = data_TvP.tostring()
        FvP = apfd.getdata(subbands=64).reshape(64, 64)
        new_FvP = np.array(FvP)
        data_FvP = np.empty([64, 64, 1])
        data_FvP[:, :, 0] = new_FvP
        fvp_raw = data_FvP.tostring()
        profile = apfd.getdata(phasebins=64)
        new_profile = np.array(profile)
        data_profile = np.empty([64, 1])
        data_profile[:, 0] = np.transpose(new_profile)
        prof_raw = data_profile.tostring()
        dmb = apfd.getdata(DMbins=64)
        new_dmb = np.array(dmb)
        data_dmb = np.empty([64, 1])
        data_dmb[:, 0] = np.transpose(new_dmb)
        dm_raw = data_dmb.tostring()
        raw_label = np.int64(shuffle_labels[n])

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'fvp':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[fvp_raw])),
                'tvp':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[tvp_raw])),
                'prof':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[prof_raw])),
                'dm':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[dm_raw])),
                'label':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[raw_label]))
            }))
        if n < train_file_num:
            train_pfd_writer.write(example.SerializeToString())
        else:
            vaild_pfd_writer.write(example.SerializeToString())
        n += 1

    train_pfd_writer.close()
    vaild_pfd_writer.close()
    print("pfd data to tfrecord data have finished!")