def perform_coverage_delete(self, files): # perform coverage delete cleaner = Cleaner('pass # pragma: no cover\n') for filename in files.keys(): content = self.read(filename) content = cleaner.clean(content, files[filename]) self.write(filename, content)
def __init__(self): self.cleaner = Cleaner() self.maxlen = 900 self.tokenizer = None self.parent_path = Path(__file__).parent.parent with open( self.parent_path / 'data/neural_network_config/tokenizer.pickle', 'rb') as handle: self.tokenizer = pickle.load(handle) # load json and create model json_model_keras = open( self.parent_path / 'data/neural_network_config/model.json', 'r') loaded_model_json = json_model_keras.read() json_model_keras.close() self.loaded_model = model_from_json(loaded_model_json) # load weights into new model self.loaded_model.load_weights(self.parent_path / "data/neural_network_config/model.h5") # evaluate loaded model on test data self.loaded_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'accuracy'])
class Tips(object): """ Manage Tips Events. """ def __init__(self, enable): self.enable = enable self._tips = {} self._new_tips = set() self.lock = Lock() if self.enable: self.fetcher = Fetcher(self._tips, self.lock, self._new_tips) self.cleaner = Cleaner(self._tips, self.lock, self._new_tips) self.fetcher.start() self.cleaner.start() def tips(self): return self._tips.values() def new_tips(self): if self._new_tips: wait_free_acquire(self.lock) res = [self._tips[x] for x in self._new_tips] self._new_tips.clear() self.lock.release() return res else: return [] def stop(self): if self.enable: self.fetcher.finnish() self.cleaner.finnish()
def __init__(self): self.lda_model = None self.dictionary = None self.n_topic = None self.n_passes = None self.cleaner = Cleaner() self.folder_name = "models"
def play(self): ''' ''' Cleaner.clean_wait_begin(self, self.check_begin) self.init_slot_descriptions() #change the picture self.background_wait_begin = Picture(self.app, image=PATH_RESSOURCES + "background_play.jpg", grid=[0, 0, 40, 40]) response = str( requests.get(BASE_URL + '/play/round/1/hand/' + self.token).content)[2:-1] datas = json.loads(response) self.cards = [] idx_pos = 1 for data in datas: self.cards.append( PushButton(self.app, image=PATH_RESSOURCES + "cards_min/card_" + data["image"] + "_min.png", command=self.show_card, args=[data], grid=[idx_pos, 35])) idx_pos += 1
def main(endpoint_models: list): """ Loop over endpoint models, search file directory for endpoints and compare to a standard endpoint config. """ audit_list = list() for model in tqdm(endpoint_models, desc="Looping over endpoint models..."): # first get the gold standard config file standard_config_file = get_standard_config(model) # now open that standard file with open(standard_config_file, 'r') as standard_config: standard_config_json = json.load(standard_config) audit = Parser(standard_config_json) # gather endpoint filenames endpoint_config_files = gather_endpoints(model) for endpoint in tqdm(endpoint_config_files, desc="Looping over endpoint config files..."): with open(endpoint, 'r') as endpoint_file: endpoint_json = json.load(endpoint_file) config_diff = audit.compare(endpoint_json, endpoint.name) cleaner = Cleaner(config_diff) cleaned = cleaner.clean() audit_list.append({f"{model}": cleaned}) return audit_list
def test_metadata(self): e = Cleaner('./tests/data/models/meta_2200_model.pkl') e2 = Cleaner('./tests/data/models/72000_model.pkl') metadata = e.metadata() no_metadata = e2.metadata() self.assertTrue(metadata) self.assertFalse(no_metadata)
class TopicModeler: def __init__(self, **kwargs): ''' kwargs:: class gensim.models.ldamodel.LdaModel(corpus=None, num_topics=100, id2word=None, distributed=False, chunksize=2000, passes=1, update_every=1, alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001, minimum_probability=0.01, random_state=None, ns_conf=None, minimum_phi_value=0.01, per_word_topics=False, callbacks=None, dtype=<class 'numpy.float32'>)¶ ''' self.cleaner = Cleaner() self.lda_model = None self.lda_kwargs = kwargs self.dictionary = None def update(self, docs): cleaned = [list(self.cleaner.clean(doc)) for doc in docs] self.dictionary = corpora.Dictionary(cleaned) corpus = [self.dictionary.doc2bow(text) for text in cleaned] if self.lda_model is None: self.lda_model = models.ldamodel.LdaModel(corpus, id2word=self.dictionary, **self.lda_kwargs) else: self.lda_model.update(corpus, id2word=self.dictionary) def classify(self, doc): bow = self.dictionary.doc2bow(list(self.cleaner.clean(doc))) topic = max(self.lda_model.get_document_topics(bow), key=lambda x: x[1])[0] return self.lda_model.show_topic(topic) def print_topics(self): print(self.lda_model.print_topics(num_topics=10, num_words=3))
def format_text(self, text, document, formatting): par_len = 150 applicator = Applicator() cleaner = Cleaner() formatter = Formatter() new_text = self.get_string(text) cleaner.create_sentc_list(new_text) sentc_list = cleaner.get_sentc_list() formatter.set_sentlist(sentc_list) if formatting == "list": formatter.frmt_textlist() elif formatting == "block": formatter.frmt_textblock(par_len=par_len) elif formatting == "string": formatter.frmt_textstring() else: print("format not supported") raise SystemExit format_text = formatter.get_text() applicator.apply_text(format_text, document=document)
def faction(self): ''' ''' Cleaner.clean_wait_player(self, self.check_players) self.background_faction = Picture(self.app, image=PATH_RESSOURCES + "background_faction.png", grid=[0, 0, 20, 20]) self.button_faction_1 = PushButton(self.app, image=PATH_RESSOURCES + "faction1.png", command=self.choose_faction, args=[1], grid=[7, 12]) self.button_faction_2 = PushButton(self.app, image=PATH_RESSOURCES + "faction2.png", command=self.choose_faction, args=[2], grid=[8, 12]) self.button_faction_3 = PushButton(self.app, image=PATH_RESSOURCES + "faction3.png", command=self.choose_faction, args=[3], grid=[9, 12]) self.button_faction_4 = PushButton(self.app, image=PATH_RESSOURCES + "faction4.png", command=self.choose_faction, args=[4], grid=[10, 12])
def show_card(self, card): ''' ''' if self.card_img_big is not None: Cleaner.clean_slot_descriptions(self) self.card_img_big = Picture(self.app, image=PATH_RESSOURCES + "cards/card_" + card["image"] + ".png", grid=[30, 5, 7, 19]) self.name_card = Text(self.app, text=card["name"], font="Impact", color="white", size=20, align="left", grid=[30, 25, 10, 1]) self.description_card = Text(self.app, text="\"" + self.cut_line(card["description"]) + "\"", font="Impact", color="white", size=18, align="left", grid=[30, 27, 10, 3]) self.description_power_card = Text(self.app, text=self.cut_line( card["description_power"]), font="Impact", color="white", size=18, align="left", grid=[30, 31, 10, 3])
def __init__(self): self.initLogger() self.examiner = Examiner() self.sec = Secretary() self.clr = Cleaner() self.login() self.init()
def test_to_submission_format(self): e = Cleaner('./tests/data/models/ae3_213750_model.pkl') img = '../data/test/10.png' img, id = e.clean(img) csv = e.to_submission_format(img, id) row = csv[300].split(',') self.assertEqual(row[0], '%s_%d_%d' % (id, 1, 301)) self.assertTrue(float(row[1]) <= 1.0)
def __init__(self): self.initLogger() self.examiner = Examiner() self.sec = Secretary() self.clr = Cleaner() self.questionsDb = QuestionsDb('XFQuestionsLib.db') self.login() self.init()
def do(task): logging.debug("Start doing task: %s" % task) cleaner = Cleaner() try: return cleaner.clean(task) except: traceback.print_exc(file=sys.stderr) logging.critical('Failed while cleaning for task %s' % (task['ID'])) return False
def build(self): '''build the database''' reddit = Reddit() cleaner = Cleaner() for subreddit in reddit.get_subreddits(): for post in reddit.get_posts(subreddit): self.database.insert(cleaner.clean(post)) for comment in reddit.get_comments(post): self.database.insert(cleaner.clean(comment))
def __init__(self): self.__crawler = Crawler() self.__cleaner = Cleaner() self.__file_manager = FileManager() self.__search_engine = GoogleSearch(config.SEARCH_TOPIC, config.MAX_ITEM, config.NUMBER_OF_RESULTS_PER_PAGE, config.PAUSE_BTW_REQUEST) self.__csf_manager = CSFManager()
def __init__(self): self.config_parser = ConfigParser() self.out_dir = os.path.join(os.path.dirname(__file__), "backups/") self.create_empty_dir() self.backup_name = "%s-%s.zip" % (os.getlogin(), time.strftime("%d-%m-%Y")) self.directorys = self.config_parser.directories_to_backup self.path = os.path.join(self.out_dir, self.backup_name) self.zip_creator = ZipCreator(self.path, self.directorys) self.drive_connector = DriveConnector(self.out_dir, self.config_parser) self.cleaner = Cleaner(self.out_dir, self.config_parser.get_clean_time())
def __init__(self, **kwargs): ''' kwargs:: class gensim.models.ldamodel.LdaModel(corpus=None, num_topics=100, id2word=None, distributed=False, chunksize=2000, passes=1, update_every=1, alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001, minimum_probability=0.01, random_state=None, ns_conf=None, minimum_phi_value=0.01, per_word_topics=False, callbacks=None, dtype=<class 'numpy.float32'>)¶ ''' self.cleaner = Cleaner() self.lda_model = None self.lda_kwargs = kwargs self.dictionary = None
def wait_begin(self): ''' ''' Cleaner.clean_choose_faction(self) #change the picture self.background_wait_begin = Picture(self.app, image=PATH_RESSOURCES + "background_waiting.png", grid=[0, 0, 20, 20]) self.background_wait_begin.repeat(1000, self.check_begin)
def test_cleaner_erase_given_data(self): """ Checks if it is possible to delete a file Notes: 58 is hard-coded. DO NOT CHANGE """ file = open('plushkin1', 'w+') file.close() file = open('plushkin2', 'w+') file.close() cc = Cleaner(['plushkin1', 'plushkin2'], 0) self.assertEqual(cc.clean_and_report(), ([], 1, 58, 0))
def collect_all_data(cls, df): judges = df.copy() # for plaintext in judges.plain_text: top_split, bottom_split = Cleaner.splitter(plaintext) bad_names = Cleaner._unclean_names(top_split) good_names = Cleaner._clean_names(bad_names) author = Cleaner._clean_author(bottom_split) yield list(good_names), author
def cleanupTask(task): # cleanup task at hand # ---------------------------------------------------------------------------------------------- # Get all parameters for the production # ---------------------------------------------------------------------------------------------- cleaner = Cleaner(task) cleaner.logCleanup() print '' return
def display_db(self): ''' ***not meant to stay*** display everything in database ''' cleaner = Cleaner() for post in self.database.posts.find(): loaded = cleaner.load(post) pprint.pprint(loaded.data) for comment in self.database.comments.find(): loaded = cleaner.load(comment) pprint.pprint(loaded.data)
def performance_modelo(dataset, model): Sentenca = dataset.iloc[:, 1] Intencoes = dataset.iloc[:, 0] cleaner = Cleaner() Sentenca_cleaned = [cleaner.clean_text(x) for x in Sentenca] Sentenca_counts = CountVectorizer().fit_transform(Sentenca_cleaned) X_train, X_test, y_train, y_test = train_test_split(Sentenca_counts, Intencoes, test_size=0.15, random_state=1) model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy_score_model = accuracy_score(y_test, y_pred) return print(f"A acurácia do model é de {accuracy_score_model*100:.1f}%")
def __init__(self): """ Description ----------- Sets our default variable path. """ self.dir_to_watch = os.getenv("DIR_TO_WATCH") self.destination_dir = os.getenv("DESTINATION_DIR") self.logger = Logger() self.logger.write(f'Automated Maid', figlet=True) self.cleaner = Cleaner()
def main(dataset, model): Sentenca = dataset.iloc[:, 1] Intencoes = dataset.iloc[:, 0] cleaner = Cleaner() Sentenca_cleaned = [cleaner.clean_text(x) for x in Sentenca] vectorizer = CountVectorizer() Sentenca_counts = vectorizer.fit_transform(Sentenca_cleaned) model.fit(Sentenca_counts, Intencoes) print("Digite um comando:") nova_sentenca = input() nova_sentenca_clean = cleaner.clean_text(nova_sentenca) #nova_sentenca = "liga a luz" counts_da_nova_sentenca = vectorizer.transform( [cleaner.clean_text(nova_sentenca_clean)]) interpretacao_sentenca(counts_da_nova_sentenca, nova_sentenca)
def create_db(self) -> None: """Create database, collect data and insert them.""" print("creating tables...") Base.metadata.create_all(self.engine) print("tables created") print("uploading data from api...") collector = Collector() data = collector.collect() cleaner = Cleaner() data_cleaned = cleaner.cleaner(data) print("upload successful") print("adding data to tables...") installer = Installer() installer.install(data_cleaned, self.engine) print("database install with success")
def infer(): if request.method == "POST": body = json.loads(request.data.decode("utf-8")) text = body.get("text", "") if text == "": return Jsonify( result="" ) cleaner = Cleaner() text = clean_text(cleaner, text) tokenizer = joblib.load(os.path.join(MODELS,"tokenizer.pkl")) sequence = tokenizer.texts_to_sequences([text]) test = pad_sequences( sequence, maxlen=max_len ) _, model = create_BiLSTMRNN() model.load_weights( os.path.join(MODELS, 'BiLSTM.hdf5')) return jsonify( result= sentiment[ np.around(model.predict(test), decimals=0).argmax(axis=1)[0]] ) else: return jsonify( result="POST API call is required" )
def process_html_page(coredb, driver, page, config, locks): source = get_clean_source(driver.page_source) text_content_dirty = driver.find_element_by_tag_name("body").text text_content = Cleaner.clean_all(text_content_dirty) text_content_hash = get_source_hash(text_content) duplicate_page = coredb.get_page_with_hash(text_content_hash) if duplicate_page is not None: logging.debug('Duplicate page found with url: ' + duplicate_page['url']) # TODO: add column to db to list which page it's duplicate off coredb.update_page(page['id'], PageType.DUPLICATE.value, 200, None, None, duplicate_page['id']) return if False: # dont add new links links = get_links_from_page(driver, source) for url in links: handle_new_link(coredb, config, url, page['id'], locks) #imgs = driver.find_elements_by_xpath('//img[@src]') #img_srcs = set([img.get_attribute('src') for img in imgs]) #for img_src in img_srcs: #if ',' in img_src: #continue # for example svg+xml, .... #handle_new_image(coredb, page['id'], img_src) coredb.update_page(page['id'], PageType.HTML.value, 200, text_content, text_content_hash)
class DomesticViolenceClassifier: def __init__(self): self.cleaner = Cleaner() self.maxlen = 900 self.tokenizer = None self.parent_path = Path(__file__).parent.parent with open( self.parent_path / 'data/neural_network_config/tokenizer.pickle', 'rb') as handle: self.tokenizer = pickle.load(handle) # load json and create model json_model_keras = open( self.parent_path / 'data/neural_network_config/model.json', 'r') loaded_model_json = json_model_keras.read() json_model_keras.close() self.loaded_model = model_from_json(loaded_model_json) # load weights into new model self.loaded_model.load_weights(self.parent_path / "data/neural_network_config/model.h5") # evaluate loaded model on test data self.loaded_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'accuracy']) #returns the sentiment of a text string def domestic_violence_subject_probability(self, text: str): x = self.tokenizer.texts_to_sequences([self.cleaner.clean_text(text)]) x = pad_sequences(x, padding='post', maxlen=self.maxlen) y_new = self.loaded_model.predict(x) return y_new[0][0]
def main(): #istantiate all the useful classes linker=EntityLinker() cleaner=Cleaner() sentiment=Sentiment() utility= Utility()
def process(self): maxlen, chars, x, y, text, chars, char_indices, indices_char, next_chars = Cleaner( ).mainClean() model = self.createModel(maxlen, chars) print_callback = LambdaCallback( on_epoch_end=self.on_epoch_end(model, self.on_epoch_end( ), text, maxlen, chars, char_indices, indices_char)) ModelTrainer().main(print_callback, model, x, y)
def __init__(self, enable): self.enable = enable self._tips = {} self._new_tips = set() self.lock = Lock() if self.enable: self.fetcher = Fetcher(self._tips, self.lock, self._new_tips) self.cleaner = Cleaner(self._tips, self.lock, self._new_tips) self.fetcher.start() self.cleaner.start()
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output): whitespace_and_ignorable_list = get_whitespace_and_ignorable_list() cleaner = Cleaner(fontfile, hinting, whitespace_and_ignorable_list) cleaner.clean() # Flatten cmap format 4 (no idRangeOffset/glyphIdArray) so it is a simple # subset of format 12. change_method(_c_m_a_p.cmap_format_4,_cmap_format_4_compile, 'compile') cleaner.save(output) cleaner.close()
def sqliteCustomer(): sqlite = Sqlite() sqlite.createTable() #创建表 cl = Cleaner(sqlite) while 1: if not gl.QTFLAG: #退出检测 gl.DCFLAG = False break try: data = gl.MYQ.get() if data[0]==3: addImg(sqlite,data) elif data[0]==7: print 'clear' cl.cleanOTImg() except Queue.Empty: time.sleep(1) except Exception,e: logger.error(str(e)) raise gl.TRIGGER.emit("<font %s>%s</font>"%(gl.style_red,getTime()+str(e)))
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output, verbose): whitespace_and_ignorable_list = get_whitespace_and_ignorable_list() cleaner = Cleaner(fontfile, hinting, whitespace_and_ignorable_list) cleaner.clean(verbose) # Flatten cmap format 4 (no idRangeOffset/glyphIdArray) so it is a simple # subset of format 12. # do we still what this? change_method(_c_m_a_p.cmap_format_4, _cmap_format_4_compile, "compile") old_12_or_13_compile = change_method(_c_m_a_p.cmap_format_12_or_13, _cmap_format_12_or_13_compile, "compile") cleaner.save(output) cleaner.close() change_method(_c_m_a_p.cmap_format_12_or_13, old_12_or_13_compile, "compile")
def test_clean(self): img = config.data_dir_path + 'test/10.png' PIL.Image.open(img).show() e = Cleaner('./tests/data/models/13_72000_model.pkl') e.clean_and_show(img) e = Cleaner('./tests/data/models/ae3_213750_model.pkl') e.clean_and_show(img) pass
def __init__(self, config={}): Cleaner.__init__(self, config) self.repo = git.Repo(self.cwd) self.remote = getattr(self.repo.remotes, self.remote) #or remote/whatev
def main(p): start = time.time() # 选择文件名以'json.gz'结尾的记录 file_name_list = filter(lambda x: x.endswith('json.gz'), os.listdir(p)) # TODO 添加文件是否是24个的判断(glob模块) for file_name in file_name_list: with open(os.path.join(p, file_name), 'r') as f: raw_json_file = gzip.GzipFile(fileobj=f) record_cleaner = Cleaner() record_grouper = Grouper(db) record_normalizer = Normalizer(db) mongo_helper = MongoHelper(db) counter = ActorCounter() evaluater = Evaluater() # 数据清洗 record_cleaner.set_dirty_data(raw_json_file) record_cleaner.clean() clean_record = record_cleaner.get_clean_data() log.log('clean record %s' % len(clean_record)) # 数据处理 # 分组 record_grouper.set_records(clean_record) record_grouper.group() record_actor_exist = record_grouper.get_group_1() record_actor_new= record_grouper.get_group_2() log.log('record_actor_exist: %s' % len(record_actor_exist)) log.log('record_actor_new: %s' % len(record_actor_new)) # 处理记录的actor已存在的记录 log.log('Begin processing actor-exist records...') # 只需要删掉记录的actor_attrs即可 for record in record_actor_exist: del record['actor_attributes'] log.log('Finished.') # 处理记录的actor不存在的记录 record_normalizer.set_records(record_actor_new) record_normalizer.normalize() record_actor_new = record_normalizer.get_record_actor_new() new_actors = record_normalizer.get_new_actors() # 把本地的今日新增的Actor更新到数据库 actors = new_actors.values() mongo_helper.insert_new_actors(actors) # 对新增的Actor, 改变Redis中相应的计数 counter.count_actor_list(actors) # 计算每条记录的val evaluater.set_records(record_actor_exist) evaluater.evaluate() val_actor_exist = evaluater.get_val_cache() evaluater.set_records(record_actor_new) evaluater.evaluate() val_actor_new = evaluater.get_val_cache() # 将记录插入数据库 mongo_helper.insert_new_reocrds(record_actor_new) mongo_helper.insert_new_reocrds(record_actor_exist) # 将今日用户新增的val更新到数据库 mongo_helper.update_val(val_actor_new) mongo_helper.update_val(val_actor_exist) record_cleaner.free_mem() del record_cleaner del record_grouper del record_normalizer del mongo_helper del counter del evaluater # 生成CSV文件 util.grcount2csv() end = time.time() log.log('total: %s s' % (end - start))
from cleaner import Cleaner cleaner = Cleaner() cleaner.run()
from feeder import FeedDownloader from webhelper import WebHelper from cleaner import Cleaner from threading import Thread import time t1 = time.time() fd = FeedDownloader("http://mybroadband.co.za/news/feed", "My Broadband") wh = WebHelper() cleaner = Cleaner() articles = fd.parse() print time.time() - t1, "setup complete" t1 = time.time() wh.get_html_threaded(articles) print time.time() - t1, "threaded download complete" t1 = time.time() for a in articles: a.html = wh.attempt_get_html(a.url) print time.time() - t1, "non threaded download complete" t1 = time.time() for a in articles: if a.html: a.plaintext = cleaner.clean(a.html)
def clean(self, base, file_base): cleaner = Cleaner(logger=self.logger, options=self.options) cleaner.handle_aux(base, file_base)
def main(): parser = OptionParser(prog="reaper", version="0.1.0", usage="%prog [options] <path to folder> "+ "[<path to folder...>]", description="PyReaper is a small tool that detects " + "duplicated files by hashing them and then deletes " + "these duplicated files leaving just one of them", epilog="CAUTION: handle with EXTREME CARE, " + "use -n option first if you are not sure of " + "what are you doing, this thing deletes stuff!!!") parser.add_option("-n", "--no-action", dest="noaction", action="store_true", help="does not executes any file action") parser.add_option("-d", "--delete", dest="delete", action="store_true", help="delete every duplicated file") parser.add_option("-m", "--move-to", dest="moveto", metavar="DIR", help='Moves duplicated files instead of deleting them') parser.add_option("-p", "--print-rm-commands", dest="rmcommands", action="store_true", help="skips delete process and prints a set of \"rm\" " + "commands so you can delete the duplicate files yourself") parser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="interactive mode, will ask for each duplicate. " + "By default it deletes every duplicate found but " + "the first one") parser.add_option("-y", "--dont-ask-confirmation", dest="noconfirmation", action="store_true", help="skips confirmation question. ") parser.add_option("-s", "--store-hashes", dest="storehash", action="store_true", help="store and keep calculated hashes in .digest hidden files ") parser.add_option("-t", "--delete-empty-trees", dest="deletedirs", action="store_true", help="deletes empty trees when finishes") parser.add_option("-e", "--ext", dest="extension", action="store", help="only digests files with the given extension" ) parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="outputs much more information during process " + "(sometimes even too much)") parser.add_option("", "--ignore-stored-hashes", dest="ignorehashes", action="store_true", help="ignores stored calculated hashes in .digest " + "hidden files, this means every hash will be " + "recalculated") (options, args) = parser.parse_args() if not args: exit_with_error('', parser) br = Walker(options.extension, \ options.storehash, \ options.verbose, \ options.ignorehashes) action = None moveto = None rmcommands = False if options.noaction: action = 'n' elif options.moveto: action = 'm' moveto = options.moveto if not moveto: exit_with_error('No "move to" target provided', parser) elif not os.path.exists(moveto): exit_with_error('Path %s does not exists' % moveto, parser) elif not os.path.isdir(moveto): exit_with_error('Path %s is not a directory' % moveto, parser) elif options.delete: action = 'd' rmcommands = options.rmcommands if action is None: exit_with_error('No action selected', parser) for path in args: if not os.path.exists(path): exit_with_error("path {0} does not exists".format(path), parser) br.digest(path) duplicates = br.collisions() clean = False if duplicates: print "Duplicates found, cleaning..." c = Cleaner( duplicates, options.interactive, options.verbose, action, rmcommands, options.noconfirmation, moveto) clean = c.clean() else: print "No duplicates found" if not options.storehash: print "Deleting digest files..." c = Cleaner(verbose = options.verbose) c.delete(br.digestFiles(), -1, True) if options.deletedirs: c = Cleaner(verbose = options.verbose) for path in args: empty_dirs = br.findEmptyDirs(path) for dir in empty_dirs: if options.rmcommands or options.noaction: print "Keeping empty tree {0}".format(dir) else: c.deleteDir(dir) if clean: sys.exit(0) else: sys.exit(1)
from os import environ as environment import argparse, yaml import logging from cleaner import Cleaner parser = argparse.ArgumentParser() parser.add_argument("--path", help="path to run cleaner on", type=str) args = parser.parse_args() # logging.basicConfig(level=logging.DEBUG) with open("config.yml") as sets: config = yaml.load(sets) path = args.path if not path: path = config["cleaner"]["general_pattern"] cleaner = Cleaner(config["cleaner"]) print "Cleaning path: " + str(path) cleaner.clean(path, True)
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output): whitespace_list = get_whitespace_list() cleaner = Cleaner(fontfile, hinting, whitespace_list) cleaner.clean() cleaner.save(output) cleaner.close()