class FinderTests(TestCase): # Create the stringlist array with 10000 elements using random string where each string contains 10 characters. # The method also note the start time. # The method also hold the first element of the array for success case comparision. def setUp(self): per_str_chars = 10 array_size = 10000 string_list = [ ''.join( random.choice(string.ascii_lowercase) for _ in range(per_str_chars)) for _ in range(array_size) ] self.first_array_str = string_list[0] self.finder = Finder(string_list) self.start_time = time.time() # The method test the success scenario by using reverse string in the # find method and then checking the first string of the array. def test_success(self): time.sleep(1) first_array_reverse_str = self.first_array_str[::-1] self.assertEqual([self.first_array_str], self.finder.find(first_array_reverse_str)) # Checking the invalid string scenario, in this case method should return None. def test_failure(self): time.sleep(2) self.assertEqual(None, self.finder.find('Invalid')) # Log the time taken by each tests. def tearDown(self): t = time.time() - self.start_time print("%s: %.3f" % (self.id(), t))
def main(): word1 = sys.argv[1] word2 = sys.argv[2] size = len(word1)+1 # include the new line finder = Finder(size, sys.argv[3]) # flip it since we backtrack if finder.find(word2, word1): print word2 else: print "no match"
required=False, default=False, action='store_true') arg_parser.add_argument('-s', '--success', required=False, default=False, action='store_true') args = arg_parser.parse_args() settings = { 'save_json': args.json, 'save_csv': args.csv, 'print_success': args.success, } fndr = Finder(**settings) if fndr.have_internet(): fndr.find(args.name) if args.universal: with open('sites.txt') as sites_file: for url in sites_file: fndr.register_universal(url.strip()) fndr.find_universal(args.name)
def test_finder_empty(self): finder = Finder([]) assert finder.find("ftdjr") == None
def test_finder_single(self): finder = Finder([None, "dsad", None, "rweew", "rtdfj"]) assert finder.find("ftdjr") == "rtdfj"
def test_finder_multiple(self): finder = Finder([None, "dsa", None, "sad"]) assert finder.find("sad") == ["dsa", "sad"]
async def message(msg: types.Message): find = msg.text avito = Finder('avito.ru', find, 'https://www.avito.ru/rossiya?q=', 'price-text-1HrJ_', 'span') await bot.send_message(msg.from_user.id, avito.find())
config = Config.get_default_config(args) elif args.imbalanced: config = Config.get_imbalanced_off_by_one_config(args) else: config = Config.get_balanced_off_by_one_config(args) wandb.init(config=config, project="msc_thesis_hendrig") #config.update_values(wandb.config) os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUSelector(config).pick_gpu_lowest_memory()) model = Model(config) print('Created model') if config.TRAIN_PATH: model.train() if config.TEST_PATH and not args.data_path: evaluator = Evaluator(config, model) evaluator.evaluate() if args.predict: predictor = InteractivePredictor(config, model) predictor.predict() if args.find_bugs and args.find_data_path: finder = Finder(config, model) finder.find() if args.release and args.load_path: model.evaluate(release=True) if args.api_endpoint: api_endpoint = APIEndpoint(config, model) api_endpoint.run() model.close_session()
from finder import Finder finder = Finder('GoogleNews-vectors-negative300.bin', True) from tika import parser raw = parser.from_file('harry6.pdf') content = raw['content'] lines = content.split('\n') content = content.replace('\n', ' ') harry_potter_6 = content.split('.') ret = (finder.find('mansion ruined', harry_potter_6)) print_info(ret) ret = (finder.find('magic killed', harry_potter_6)) print_info(ret) ret = (finder.find('drink poison', harry_potter_6)) print_info(ret) st() if __name__ == '__main__': main()
def do_GET(self): self._set_headers() if self.path.startswith('/?'): path = parse.urlparse(self.path[2:]).path path = parse.unquote_plus(path) entries = {} package = False for entry in path.split('&'): a = entry.split('=') if a[0] == 'extras[]': if 'package' in a[1]: package = True continue entries[a[0]] = a[1] if not a[1].isdecimal() else int(a[1]) conditions = Condition.create_conditions(entries) timestamp = datetime.now().strftime('%y%m%d_%H%M%S') log = open(timestamp + '.txt', 'w') log.write(pprint.pformat(conditions) + '\n') log.close() documents = Finder.find( Finder.get_collection('localhost', 27017, 'shoulie', 'resumes'), conditions) if package: Finder.package(documents, WebSvr.base_folder, timestamp + '.zip') message = Reporter.to_html(documents, '') """ html = open(timestamp + '.html', 'w') html.write(message) html.close() """ self.wfile.write(bytes(message, 'utf8')) elif self.path == '/' or self.path.endswith('.html'): path = parse.unquote(self.path.lstrip('/')) if not path: path = 'form.html' else: path = os.path.join(WebSvr.base_folder, path.split('_')[0], path) try: f = open(path) except FileNotFoundError: self.send_error(404, 'File Not Found: ' + path) return self.wfile.write(bytes(f.read(), 'utf8')) elif self.path.endswith('.docx'): path = parse.unquote(self.path.lstrip('/')) basic = os.path.splitext(path)[0] conditions = Condition.create_conditions({'file': basic + '.html'}) documents = Finder.find( Finder.get_collection('localhost', 27017, 'shoulie', 'resumes'), conditions) """ txt = open(basic + '.txt', 'w') txt.write(pprint.pformat(documents[0]) + '\n') txt.close() """ Saver.to_doc(documents[0], path) message = '''<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <style type="text/css">td, th {{ border: 1px solid black; }}</style> </head> <body> File "{}" generated </body> </html> ''' self.wfile.write(bytes(message.format(path), "utf8"))
argp.add_argument("-i", "--index", required=True, help="Path to where the computed index will be stored") argp.add_argument("-q", "--query", required=True, help="Path to the query image") argp.add_argument("-r", "--result-path", required=True, help="Path to the result path") args = vars(argp.parse_args()) # initializing with same number of color histogram bars colorDef = ColorDefine((8, 12, 3)) query = cv2.imread(args["query"]) features = colorDef.define(query) finder = Finder(args["index"]) results = finder.find(features) cv2.imshow("Query", query) for (distance, fileName) in results: # loads the results images and dislay them result = cv2.imread(args["result_path"] + "/" + fileName) cv2.imshow("Result", result) cv2.waitKey()
def test_string_exists_in_string_list(self): finder = Finder(string_list=["asd", "asdd", "fre", "glk", "lkm"]) self.assertEqual(["asd", "asdd"], finder.find("asd"))
def test_input_string_duplicate_char(self): finder = Finder(string_list=["pineapple", "apple", "aple", "app"]) self.assertEqual(["pineapple", "apple", "aple"], finder.find("apple"))
def test_string_list_is_empty(self): finder = Finder(string_list=[]) self.assertEqual([], finder.find("asd"))
class Retriever: def __init__(self, query): self.stop_words = map( lambda x: unicode(x), codecs.open('stop_words.txt', 'r', 'utf-8').read().split()) self.finder = Finder() self.query = query self.tokens = self.parse() # removes spaces, punctuation from query def strip(self, text): text = re.sub(ur'[\W_0-9]+', u'', text, flags=re.UNICODE) return unicode(text.lower()) # removes all the stop words from the list of words def remove_stop_words(self, words): return [w for w in words if not w in self.stop_words] # cleans query by stripping punctuations and stop words def parse_query(self, query): text = map(lambda x: self.strip(x), query) return (self.remove_stop_words(text)) # parses input query, checks if weights are given in input # weights are assumed if -w option is provided # if not, then all terms in query are given 1 weight def parse(self): temp = {} if self.query[0] == '-w': l = self.query[1:] for tup in [(l[i], l[i + 1]) for i in range(0, len(l), 2)]: temp[tup[0]] = float(tup[1]) else: for token in self.query: temp[token] = 1 tokens = self.parse_query(temp.keys()) for key in temp.keys(): if key not in tokens: temp.pop(key, None) return temp # driver function calculates scores and prints the results def run(self): print(self.query) self.parse() results = {} for tup in self.tokens.items(): token, weight = tup number_of_entries, postings_index = self.finder.find( unicode(token)) for i in range(number_of_entries): line = linecache.getline('postings.txt', postings_index) doc_id, score = line.split(",") if doc_id in results.keys(): results[doc_id] += float(score) * weight else: results[doc_id] = float(score) * weight postings_index += 1 sorted_results = sorted(results.items(), key=operator.itemgetter(1), reverse=True) for tup in islice(filter(lambda t: t[1] > 0.0, sorted_results), 10): print('{}.html {}'.format(tup[0], round(tup[1], 3)))