Beispiel #1
0
    def _create_path(self):
        """ Returns the path for the desired model outputs
        Keyword arguments:
        path -- The path to the parent directory.
        variable -- The desired physical variable
        time_scale -- options are 3hourly, 6hourly, daily, monthly, timestep
        simulation -- options are xgxqe, xgxqf, xgxqg, xgxqh, xgxqi, xgxqj, xgxqy
        """

        dic_of_paths = load_dictionary()
        lKey = [
            key for key, value in dic_of_paths.iteritems()
            if value[0] == self.variable
        ]
        nitems = len(lKey)
        for i in range(0, nitems):
            path = os.path.join(self.base_path, self.resolution,
                                self.time_scale, lKey[i], self.simulation)
            print 'Trying path ' + path
            if os.path.isdir(path):
                self.path = path
                return path
                print('File found')
                break
        raise Exception('Could not find dir')
Beispiel #2
0
def display_variables():
    dic_of_paths = load_dictionary()

    variables = []
    for key in dic_of_paths.keys():
        variable = dic_of_paths[key][0]

        variables.append(variable)
    print(variables)
    return variables
Beispiel #3
0
def japanese_dictionary_reader():
	dictionary_path = download(
		"https://dumps.wikimedia.org/jawiktionary/latest/jawiktionary-latest-pages-articles-multistream.xml.bz2",
		"jawiktionary.xml.bz2"
	)
	with bz2.open(dictionary_path, 'rt') as f:
		repack = f.readline().strip() != '<mediawiki>'

	if repack:
		print("Repacking jawiktionary")
		subprocess.check_call(["bash", "-c", f"cat <(echo '<mediawiki>') <(bunzip2 -c {dictionary_path} | tail -n +2) "
				"| bzip2 > tmp/jawiktionary-repack.xml.bz2"])
		os.rename("tmp/jawiktionary-repack.xml.bz2", dictionary_path)

	dictionary.load_dictionary()

	try:
		source = bz2.open(dictionary_path, 'rt')
		for _, elem in ET.iterparse(source):
			if elem.tag == 'page':
				if elem.find("ns").text != '0':
					elem.clear()
					continue

				entries = parse_jawiktionary_word(elem.find('title').text, elem.find("revision").find("text").text)
				if entries is not None:
					yield from entries
				elem.clear()

			elif elem.tag == 'mediawiki':
				elem.clear()
	except BaseException:
		print(f"\nHeadings:\n{pos_headings}\n{other_headings}\n")
		raise

	print(f"Headings:\n{pos_headings}\n{other_headings}")
from tensorflow import keras
import pandas as pd
import numpy as np
import os
from dictionary import load_dictionary
from preprocess import encodeData, encodeKeyAndLoc

maxLen = 150

test_data = pd.read_csv(os.getcwd() + "/NLP with Disaster Tweets/data/test.csv",sep=",")
ids = np.array(test_data['id'])
ids = ids.reshape(len(ids),1)
X_test = np.array(test_data)

textDict = load_dictionary('dictionary.pkl')
keyDict = load_dictionary('keyDict.pkl')
locDict = load_dictionary('locDict.pkl')

textInput = np.asarray(encodeData(X_test[:,3], textDict, maxLen), dtype=np.float32)
keyInput = encodeKeyAndLoc(X_test[:,1], keyDict)
locInput = encodeKeyAndLoc(X_test[:,2], locDict)

keyInput = np.expand_dims(np.array(keyInput), axis=1)
locInput = np.expand_dims(np.array(locInput), axis=1)
keyAndLocInput = np.asarray(np.concatenate((keyInput, locInput), axis=1), dtype=np.float32)

model = keras.models.load_model(os.getcwd() + '/NLP with Disaster Tweets/model.h5')

predictions = model.predict([textInput, keyAndLocInput])

for i in range(0, len(predictions)):
Beispiel #5
0
                    walkaways['occurence'][buffer_start_value] = context

        if 'occurence' in walkaways:
            walk_aways.append(walkaways)
            log_walkaways(walkaways)

        bar.update(processed_files_number)
        processed_files_number = processed_files_number + 1
    return walk_aways


if __name__ == '__main__':
    movie_dictionary = {}
    try:
        logging.info('Loading dictionary...')
        movie_dictionary = dictionary.load_dictionary()
    except FileNotFoundError:
        logging.info('Building dictionary...')
        movie_dictionary = dictionary.build_dictionary()

    logging.info('Reading...')
    walk_aways = indentify_walk_aways(movie_dictionary)

    with open(
            'data/%s-%s.pkl' %
        (WALKAWAY_DEFAULT_FILE, str(datetime.datetime.now())),
            'wb+') as output:
        pickle.dump(walk_aways, output, pickle.HIGHEST_PROTOCOL)

    with open(
            'data/%s-%s.json' %