Ejemplo n.º 1
0
def read_corpus_word2vec():
    info = utils.read_info("../info.tsv")
    '''
    This tokenizer divides a text into a list of sentences,
    by using an unsupervised algorithm to build a model for
    abbreviation words, collocations, and words that start
    sentences. It must be trained on a large collection of
    plaintext in the target language before it can be used.

    The NLTK data package includes
    a pre-trained Punkt tokenizer for English.
    '''

    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = []
    NUM_DOCS = len(info) # get number of docs
    for indDoc in range(0, NUM_DOCS):
        text = open(info[indDoc][0]).read()
        raw_sentences = sent_detector.tokenize(text.strip())
        for raw_sentence in raw_sentences:
            if len(raw_sentence) > 0:
                sentences.append(sentence_process(raw_sentence))

        sys.stdout.write("\rReading collection: %d%%" %(indDoc*100/NUM_DOCS))
        sys.stdout.flush()
    return sentences
Ejemplo n.º 2
0
def set_input(folders_matrix):
    Ts = read_stru.get_T()
    input_dict = read_stru.get_input_dict()
    Ecut = float(input_dict["ecutwfc"])
    info = utils.read_info()
    input = {
        "file_list": [
            f"{utils.folder_opt_matrix}/" + folder_matrix
            for folder_matrix in folders_matrix
        ],
        "info": {
            "Nt_all": Ts,
            "Nu": {T: info["Nu"]
                   for T in Ts},
            "Nb_true": [nbands for weight, nbands in folders_matrix.values()],
            "weight": [weight for weight, nbands in folders_matrix.values()],
            "Rcut": read_stru.get_Rcut(),
            "dr": {T: utils.dr
                   for T in Ts},
            "Ecut": {T: Ecut
                     for T in Ts},
            "lr": utils.lr
        },
        "C_init_info": {
            "init_from_file": False
        },
        "V_info": {
            "init_from_file": True,
            "same_band": False
        }
    }
    return json.dumps(input, indent=4)
Ejemplo n.º 3
0
def read_corpus_word2vec():
    info = utils.read_info("../info.tsv")
    '''
    This tokenizer divides a text into a list of sentences,
    by using an unsupervised algorithm to build a model for
    abbreviation words, collocations, and words that start
    sentences. It must be trained on a large collection of
    plaintext in the target language before it can be used.

    The NLTK data package includes
    a pre-trained Punkt tokenizer for English.
    '''

    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = []
    NUM_DOCS = len(info)  # get number of docs
    for indDoc in range(0, NUM_DOCS):
        text = open(info[indDoc][0]).read()
        raw_sentences = sent_detector.tokenize(text.strip())
        for raw_sentence in raw_sentences:
            if len(raw_sentence) > 0:
                sentences.append(sentence_process(raw_sentence))

        sys.stdout.write("\rReading collection: %d%%" %
                         (indDoc * 100 / NUM_DOCS))
        sys.stdout.flush()
    return sentences
Ejemplo n.º 4
0
 def read_basic_info(self):
     self.original_sr, self.nchannels, self.sampwidth, self.length = utils.read_info(
         self.path)
     self.md5 = utils.binaryMD5(self.path)
     self.duration = (float(self.length) /
                      float(self.original_sr)) / self.timeexp
     self.filesize = utils.media_size(self.path)
     self.sr = self.original_sr
     self.mask = None
     self.signal = None
Ejemplo n.º 5
0
def get_dis_opt(dis):
	opt_mode = "kmeans"
	dis_opt = dict()
	info = utils.read_info()
	for T1,T2 in dis:
		dis_tmp = read_stru.delete_zero(dis[T1,T2])
		if len(dis_tmp)<=info["dimer_num"]:
			dis_opt[T1,T2] = list(dis_tmp.keys())
		else:
			if opt_mode=="linspace":
				dis_opt[T1,T2] = np.linspace( min(dis_tmp), max(dis_tmp), info["dimer_num"] )
			elif opt_mode=="kmeans":
				kmeans = KMeans(n_clusters=info["dimer_num"])
				label = kmeans.fit_predict(
					np.array(list(dis_tmp.keys())).reshape(-1,1), 
					sample_weight = [num/i_dis**2 for i_dis,num in dis_tmp.items()])
				dis_opt[T1,T2] = list(kmeans.cluster_centers_.reshape(-1))
				pprint.pprint(dict(zip(dis_tmp.keys(),label)))
		if T1==T2:
			dis_opt[T1,T2].append(0.0)
	return dis_opt
Ejemplo n.º 6
0
def cal(input):
    info = utils.read_info()
    with open(f"{utils.folder_opt}/input.json", "w") as file:
        file.write(input)
    if utils.sub == "qsub":
        with open(f"{utils.folder_opt}/sub.sh", "w") as file:
            file.write(
                textwrap.dedent(f"""\
				#!/bin/bash
				#PBS -q batch
				#PBS -l nodes=1:ppn=1
				#PBS -l walltime=2:00:00
				#PBS -o job.log
				#PBS -e job.err
				ulimit -s unlimited
				cd $PBS_O_WORKDIR
				export OMP_NUM_THREADS=1
				EXEC={info["opt_orb"]}
				python3 -u $EXEC
				"""))
    elif utils.sub == "tianhe2":
        with open(f"{utils.folder_opt}/sub.sh", "w") as file:
            file.write(
                textwrap.dedent(f"""\
				#!/bin/bash
				EXEC={info["opt_orb"]}
				python3 -u $EXEC >Log.txt
				"""))
    os.chdir(utils.folder_opt)
    if utils.sub == "qsub":
        os.system("qsub sub.sh")
    elif utils.sub == "tianhe2":
        os.system("yhbatch -N 1 sub.sh")


#	os.system(f'python3 -u {info["opt_orb"]}')
    os.chdir("../")
Ejemplo n.º 7
0
def cal_ABACUS(T1, T2, i_dis):
    folder = pathlib.Path(utils.folder_name(T1, T2, i_dis)).resolve()
    folder.mkdir(parents=True, exist_ok=False)

    with open(folder / "INPUT", "w") as file:
        info = utils.read_info()
        input_dict = read_stru.get_input_dict()
        input_dict["ntype"] = 1 if T1 == T2 else 2
        input_dict["exx_hybrid_type"] = 'opt_orb'
        input_dict["nbands"] = (read_stru.get_nw()[T1] if abs(i_dis) < 1E-10
                                else read_stru.get_nw()[T1] +
                                read_stru.get_nw()[T2])
        input_dict["nspin"] = 1
        input_dict["gamma_only"] = 1
        input_dict["pseudo_dir"] = os.path.abspath(
            input_dict.get("pseudo_dir", r"./"))
        input_dict["exx_opt_orb_lmax"] = len(info["Nu"]) - 1
        read_stru.print_input(file, input_dict)

    with open(folder / "STRU", "w") as file:
        Ts = (T1, ) if T1 == T2 else (T1, T2)
        file.write("ATOMIC_SPECIES\n")
        pseudo_path = read_stru.get_pseudo_path()
        for T in Ts:
            file.write(f"{T}	1	{pseudo_path[T]}\n")
        file.write("\nNUMERICAL_ORBITAL\n")
        lcao_path = read_stru.get_lcao_path()
        for T in Ts:
            file.write(f"{lcao_path[T]}\n")
        file.write(
            textwrap.dedent(f"""
			LATTICE_CONSTANT
			1\n
			LATTICE_VECTORS
			30 0 0
			0 30 0
			0 0 30\n
			ATOMIC_POSITIONS
			Cartesian
			"""))
        if T1 == T2:
            if abs(i_dis) < 1E-10:
                file.write(
                    textwrap.dedent(f"""
					{T1}
					0
					1
					0 0 0 0 0 0
					"""))
            else:
                file.write(
                    textwrap.dedent(f"""
					{T1}
					0
					2
					0 0 0 0 0 0
					{i_dis} 0 0 0 0 0
					"""))
        else:
            file.write(
                textwrap.dedent(f"""
				{T1}
				0
				1
				0 0 0 0 0 0\n
				{T2}
				0
				1
				{i_dis} 0 0 0 0 0
				"""))

    with open(folder / "KPT", "w") as file:
        file.write(
            textwrap.dedent(f"""\
			K_POINTS
			0
			Gamma
			1 1 1 0 0 0
			"""))

    info = utils.read_info()
    if utils.sub == "qsub":
        with open(folder / "sub.sh", "w") as file:
            file.write(
                textwrap.dedent(f"""\
				#!/bin/bash
				#PBS -q gold5120
				#PBS -l nodes=1:ppn=1
				#PBS -l walltime=1:00:00
				#PBS -o job.log
				#PBS -e job.err
				ulimit -s unlimited
				cd $PBS_O_WORKDIR
				EXEC={info["ABACUS"]}
				mpirun -n 1 -env OMP_NUM_THREADS=1 $EXEC
				"""))
    elif utils.sub == "bsub":
        with open(folder / "sub.sh", "w") as file:
            file.write(
                textwrap.dedent(f"""\
				#!/bin/sh
				#BSUB -q renxg
				#BSUB -o job.log -e job.err
				#BSUB -n 1
				EXEC={info["ABACUS"]}
				mpirun -n 1 -env OMP_NUM_THREADS=1 $EXEC
				"""))

    os.chdir(folder)
    if utils.sub == "qsub":
        os.system("qsub sub.sh")
    elif utils.sub == "bsub":
        os.system("bsub < sub.sh")
    elif utils.sub == "tianh2":
        os.system(f'yhrun -n 1 -c 1 {info["ABACUS"]} >Log.txt')
    os.chdir("../")
Ejemplo n.º 8
0
def cal():
    pathlib.Path(utils.folder_exx).mkdir(parents=True, exist_ok=False)

    os.system(f"cp INPUT {utils.folder_exx}/")
    os.system(f"cp KPT {utils.folder_exx}/")

    with open(f"{utils.folder_exx}/INPUT", "w") as file:
        input_dict = read_stru.get_input_dict()
        input_dict["pseudo_dir"] = os.path.abspath(
            input_dict.get("pseudo_dir", r"./"))
        read_stru.print_input(file, input_dict, 1)

    with open("STRU", "r") as file:
        strus = re.compile("LATTICE_CONSTANT").split(file.read())
    with open(f"{utils.folder_exx}/STRU", "w") as file:
        Ts = read_stru.get_T()
        file.write("ATOMIC_SPECIES\n")
        pseudo_path = read_stru.get_pseudo_path()
        for T in Ts:
            file.write(f"{T}	12	{pseudo_path[T]}\n")
        file.write("\nNUMERICAL_ORBITAL\n")
        lcao_path = read_stru.get_lcao_path()
        for T in Ts:
            file.write(f"{lcao_path[T]}\n")
        file.write("\nABFS_ORBITAL\n")
        for T in read_stru.get_T():
            file.write(f"../{utils.folder_opt}/orb_{T}.dat\n")
        file.write("\nLATTICE_CONSTANT")
        file.write(strus[1])

    info = utils.read_info()
    if utils.sub == "qsub":
        with open(f"{utils.folder_exx}/sub.sh", "w") as file:
            file.write(
                textwrap.dedent(f"""\
				#!/bin/bash
				#PBS -q gold5120
				#PBS -l nodes=2:ppn=28
				#PBS -l walltime=99:99:99
				#PBS -o job.log
				#PBS -e job.err
				ulimit -s unlimited
				cd $PBS_O_WORKDIR
				EXEC={info["ABACUS"]}
				mpirun -n 2 -env OMP_NUM_THREADS=28 $EXEC
				"""))
    elif utils.sub == "bsub":
        with open(f"{utils.folder_exx}/sub.sh", "w") as file:
            file.write(
                textwrap.dedent(f"""\
				#!/bin/sh
				#BSUB -q renxg
				#BSUB -o job.log -e job.err
				#BSUB -n 6
				mpirun -n 2 -env OMP_NUM_THREADS=28 {info['ABACUS']}
				"""))

    os.chdir(utils.folder_exx)
    if utils.sub == "qsub":
        os.system("qsub sub.sh")
    elif utils.sub == "bsub":
        os.system(f"bsub < sub.sh")
    elif utils.sub == "tianhe2":
        os.system(
            f'yhrun -N 1 -n 1 -c 24 -t 1440 {info["ABACUS"]} >Log.txt 2>&1 &')
    else:
        raise ValueError("utils.sub")
    os.chdir("../")