import sys import os import zipfile import fnmatch import shutil execfile("config.py") sys.path.append(BUILD_PYTHON_FOLDER) from VersionHandler import VersionHandler version = VersionHandler(VERSION_TXT) version.read() datestr = version.datestr() vstring = version.version() def rename_and_move(file, target): tfile = '%s/%s'%(target, os.path.basename(file)) tfile = tfile.replace('win64', 'x64') print "Copying %s to %s"%(file, tfile) shutil.copy(file, tfile) def find_by_pattern(path, pattern): matches = [] for root, dirnames, filenames in os.walk(path): if '_CPack_Packages' not in root: for filename in fnmatch.filter(filenames, pattern): if filename == "vc110.pdb": continue matches.append(os.path.join(root, filename)) return matches
parser.add_option("-f", "--file", dest="filename", help="File to update version in", metavar="FILE") parser.add_option("-g", "--generate-hpp", dest="targetHPP", help="Generate a HPP file") parser.add_option("-c", "--create", action="store_true", dest="create", help="Create a new file") parser.add_option("-d", "--display", action="store_true", dest="display", help="Display the current version") parser.add_option("-u", "--update", dest="update", help="Update a file (major, minor, revision, build)") (options, args) = parser.parse_args() version = None if options.filename and options.create: version = VersionHandler(options.filename) version.create() elif options.filename and options.update: version = VersionHandler(options.filename) version.read() version.increment(options.update) version.touch() version.write() elif options.filename: version = VersionHandler(options.filename) version.read() else: parser.print_help() if version: if options.targetHPP:
### Tokenize, remove stopwords, save the result ### # preprocesser = Preprocesser() # preprocesser.tokenize(corpus, remove_stopwords=False) # corpus_tokenized = preprocesser.corpus_tokenized # pickle.dump(corpus_tokenized, open('resources/corpus_300k_filtered_tokenized_with_stopwords_cs.c', 'wb')) # save_file(corpus_tokenized, "corpus_300k_filtered_tokenized_with_stopwords_cs") # save_file(corpus_tokenized, "corpus_10k_test") corpus_tokenized = pickle.load( open( "/home/nsaef/projects/CollectionExplorer/web/CollectionExplorer/static/CollectionExplorer/corpora/12/12_tokens_stopwords-included_cs.corpus", "rb")) ##### Versioning and Duplicates ##### version_handler = VersionHandler() version_handler.calc_hashes(corpus_tokenized) candidates = version_handler.calculate_similarities() ##### Topic Modelling ##### # ### Vectorize the corpus using raw frequencies for lda ### # processer_rf = Preprocesser() # corpus_rf = processer_rf.vectorize_frequencies(corpus) # feature_names = processer_rf.feature_names_raw # ### Create topic models using LDA ### # lda = TopicModeller(n_topics=30) # lda.create_topic_models(corpus_rf, feature_names) # topics = lda.documents_per_topic(corpus_rf, corpus) # lda.print_top_words(feature_names, n_top_words=20, collection=topics)
dest="create", help="Create a new file") parser.add_option("-d", "--display", action="store_true", dest="display", help="Display the current version") parser.add_option("-u", "--update", dest="update", help="Update a file (major, minor, revision, build)") (options, args) = parser.parse_args() version = None if options.filename and options.create: version = VersionHandler(options.filename) version.create() elif options.filename and options.update: version = VersionHandler(options.filename) version.read() version.increment(options.update) version.touch() version.write() elif options.filename: version = VersionHandler(options.filename) version.read() else: parser.print_help() if version: if options.targetHPP: