def main(): timer = timers.Timer() with timers.timing("Parsing", True): task = pddl_parser.open(task_filename=options.task, domain_filename=options.domain) with timers.timing("Normalizing task"): normalize.normalize(task) if options.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) # Print pddl if a transormation option is selected. if options.exp or options.evmdd: pddl_parser.print_pddl(options.domain, sas_task, task, []) print("done!") exit(0) with timers.timing("Writing output"): with open("output.sas", "w") as output_file: sas_task.output(output_file) print("Done! %s" % timer)
def main(): args = parse_args() timer = timers.Timer() with timers.timing("Parsing", True): task = pddl.open(task_filename=args.task, domain_filename=args.domain) with timers.timing("Normalizing task"): normalize.normalize(task) if args.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) if not sas_task is None: with timers.timing("Writing output"): with open("output.sas", "w") as output_file: sas_task.output(output_file) print("Done! %s" % timer)
def build_titles(title): normalized_title = normalize(title).lower() titles = [ title, normalized_title ]; if title.find(' & ') != -1: t = title.replace(" & ", " and ") titles.append(t) titles.append(normalize(t)) t2 = [] for t in titles: if t.lower().startswith('the '): t2.append(t[4:]) elif t.lower().startswith('a '): t2.append(t[2:]) titles += t2 if re_amazon_title_paren.match(title): t2 = [] for t in titles: m = re_amazon_title_paren.match(t) if m: t2.append(m.group(1)) t2.append(normalize(m.group(1))) titles += t2 return { 'full_title': title, 'normalized_title': normalized_title, 'titles': titles, 'short_title': normalized_title[:25], }
def index(**kwargs): if request.args.get('submit') is not None: active = request.form.get('tabStatus') params = ['siteDest', 'siteSource'] if active == 'page': params.append('title') return redirect(url_for('.index', **get_params(params)), code=c.REQUEST) normalize(['title'], kwargs) if not request.form.get('tabStatus', False): if kwargs.get('siteDest', False) and not kwargs.get('title', False): kwargs['tabStatus'] = 'content' else: kwargs['tabStatus'] = 'page' if not request.form.get('siteDest', False) and not request.form.get('siteSource', False): kwargs['siteDest'] = 'th' kwargs['siteSource'] = 'en' form = wikitranslator.form.getForm()(request.form, **kwargs) data = wikitranslator.model.Model(form=form) if form.validate(data): data.render() return render('index.html', tool=__name__, form=form, data=data)
def main(): print("-------------POND Translator-----------") args = parse_args() timer = timers.Timer() with timers.timing("Parsing", True): task = pddl.open(task_filename=args.task, domain_filename=args.domain) print(); print("Problem Filename = " + args.task); print("Domain Filename = " + args.domain); print(); with timers.timing("Normalizing task"): normalize.normalize(task) if args.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) if not sas_task is None: with timers.timing("Writing output"): with open("..\\webapps\\LunaPlanner\\translator_output\\output.sas", "w") as output_file: sas_task.output(output_file) print() print("SAS file saved at: " + output_file.name) print("Done! %s" % timer)
def main(): args = parse_args() timer = timers.Timer() with timers.timing("Parsing", True): task = pddl.open(task_filename=args.task, domain_filename=args.domain, addl_filename=args.addl) with timers.timing("Normalizing task"): normalize.normalize(task) if args.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] output_file = args.output_file use_proto = args.use_proto print('Use Proto:', use_proto) sas_task = pddl_to_sas(task, args.agent_id, args.agent_url) dump_statistics(sas_task) with timers.timing("Writing output"): with open(output_file, "w") as output_file: if use_proto: sas_task.output_proto(output_file) else: sas_task.output(output_file) print("Done! %s" % timer)
def marc_title(amazon_first_parts, marc_first_parts): # print 'title found: ', marc_first_parts[-1] if normalize(marc_first_parts[-1]) not in titles: return False if compare_parts(marc_first_parts[:-1], amazon_first_parts): if verbose: print("match with MARC end title") return True if normalize(amazon_first_parts[0]) in titles: if compare_parts(marc_first_parts[:-1], amazon_first_parts[1:]): if verbose: print("match, both with titles") return True if match_seq(marc_first_parts[:-1], amazon_first_parts[1:]): if verbose: print("partial match, both with titles") return True if match_seq(marc_first_parts[:-1], amazon_first_parts): if verbose: print("partial match with MARC end title") return True if match_seq(marc_first_parts, amazon_first_parts): if verbose: print("partial match with MARC end title") return False
def main(): options, args = parse_options() check_python_version(options.force_old_python) timer = timers.Timer() with timers.timing("Parsing", True): task = pddl.open() with timers.timing("Normalizing task"): normalize.normalize(task) if options.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) with timers.timing("Writing output"): with open("output.sas", "w") as output_file: sas_task.output(output_file) print("Done! %s" % timer)
def main(): timer = timers.Timer() with timers.timing("Parsing", True): task = pddl_parser.open( domain_filename=options.domain, task_filename=options.task) with timers.timing("Normalizing task"): normalize.normalize(task) if options.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) with timers.timing("Writing output"): with open("output.sas", "w") as output_file: sas_task.output(output_file) print("Done! %s" % timer) global t1, t2 t2 = time.time() - t2 print('Time1:', t1) print('Time2:', t2)
def compare_author_fields(e1_authors, e2_authors): for i in e1_authors: for j in e2_authors: if normalize(i['db_name']) == normalize(j['db_name']): return True if normalize(i['name']).strip('.') == normalize(j['name']).strip('.'): return True return False
def translate(task): normalize.normalize(task) prog = PrologProgram() translate_facts(prog, task) for conditions, effect in normalize.build_exploration_rules(task): prog.add_rule(Rule(conditions, effect)) prog.normalize() prog.split_rules() return prog
def normalize(self): """ Performs normalization. At this level, we do those normalizations that needs both the pre & post syscall objects """ import normalize for id in self: pre,post = self.getSyscallByID(id) normalize.normalize(pre, post)
def flip_marc_name(marc): m = re_marc_name.match(marc) if not m: return remove_trailing_dot(marc) first_parts = split_parts(m.group(2)) if normalize(first_parts[-1]) not in titles: # example: Eccles, David Eccles Viscount return remove_trailing_dot(m.group(2)) + ' ' + m.group(1) if len(first_parts) > 2 and normalize(first_parts[-2]) == normalize(m.group(1)): return u' '.join(first_parts[0:-1]) return u' '.join(first_parts[:-1] + [m.group(1)])
def __init__(self, item_id, quantity, *options): """Store the descriptors of an order item in this object. Arguments: item_id -- the restaurants's numerial ID for the item quantity -- the quantity options -- any number of options to apply to the item """ self.item_id = normalize(item_id, 'number') self.quantity = normalize(quantity, 'number') self.options = [normalize(option, 'number') for option in options]
def get_delivery_check(self, restaurant_id, date_time, address): """Get data about a given restaurant, including whether it will deliver to the specified address at the specified time Arguments: restaurant_id -- Ordr.in's restaurant identifier date_time -- Either 'ASAP' or a datetime object in the future address -- the address to deliver to. Should be an ordrin.data.Address object """ dt = normalize(date_time, 'datetime') restaurant_id = normalize(restaurant_id, 'number') return self._call_api('GET', ('dc', restaurant_id, dt, address.zip, address.city, address.addr))
def index(**kwargs): if request.args.get("submit") is not None: return redirect(url_for(".index", **get_params(["title", "oldid"])), code=c.REQUEST) normalize(["title"], kwargs) form = dykchecker.form.getForm()(request.form, **kwargs) data = dykchecker.model.Model(form=form) if form.validate(data): data.render() return render("page.html", tool=__name__, form=form, data=data) else: return render("index.html", tool=__name__, form=form)
def translate(task): with timers.timing("Normalizing task"): normalize.normalize(task) with timers.timing("Generating Datalog program"): prog = PrologProgram() translate_facts(prog, task) for conditions, effect in normalize.build_exploration_rules(task): prog.add_rule(Rule(conditions, effect)) with timers.timing("Normalizing Datalog program", block=True): # Using block=True because normalization can output some messages # in rare cases. prog.normalize() prog.split_rules() return prog
def update(self, login, first_name, last_name): """Updates account for the user associated with login. Throws a relevant exception on failure. Arguments: login -- the user's login information. Should be an ordrin.data.UserLogin object first_name -- the user's first name last_name -- the user's last name """ data = {'email':login.email, 'first_name':normalize(first_name, 'name'), 'last_name':normalize(last_name, 'name'), 'pw':login.password} return self._call_api('POST', ('u', login.email), login=login, data=data)
def set_credit_card(self, login, card_nick, credit_card): """Save an credit card by nickname for the logged in user Throws a relevant exception on failure Arguments: login -- the user's login information. Should be an ordrin.data.UserLogin object card_nick -- the nickname of the credit card to save credit_card -- the credit card to save. Should be an ordrin.data.CreditCard object """ card_nick = normalize(card_nick, 'nick') data = credit_card.make_dict() data.update(login.make_dict()) data['nick'] = card_nick data['phone'] = normalize(credit_card.phone, 'phone') return self._call_api('PUT', ('u', login.email, 'ccs', card_nick), login=login, data=data)
def do_wicked(self, newcontent, title, section): try: wicked = getWicked(self.context.getField('text'), self.context) wicked.section=section wicked.manageLink(newcontent, normalize(title)) except ComponentLookupError: pass
def run(parser, args): ## ## TODO - just change to 1 argument: --protocol -- with options [1,2,3,4] if args.protocol1: protocol=1 elif args.protocol2: protocol=2 elif args.protocol3: protocol=3 elif args.protocol4: protocol=4 elif args.protocol5: protocol=5 elif args.protocol6: protocol=6 late = normalize(latestage=args.latestage, protocol=protocol, earlystage=args.earlystage, pseudo=args.pseudo, bandwidth=args.bandwidth, quiet=args.quiet) if args.regions: # find maximums (summits) within regions given regions = BedTool(args.regions) else: # find peak regions by algorithm at top, then summits within them ## Read in states bedGraph, identify peaks ## states = CovBed(args.states) regions = find_candidate_regions(args.states, thresh_state=1, merge1=10e3, minwidth=50e3, merge2=40e3, max_state_thresh=2, internal=0.8) ##Covert CovBed object to BedTool object a = BedTool( StringIO.StringIO( late.get_bdg(bdg=late.count, collapsed=True) ) ) ans = summits(a = a, b = regions) print str(ans).strip()
def _call_api(self, method, arguments, login=None, data=None): """Calls the api at the saved url and returns the return value as Python data structures. Rethrows any api error as a Python exception""" method = normalize(method, 'method') uri = '/'+('/'.join(urllib.quote_plus(str(arg)) for arg in arguments)) full_url = self.base_url+uri headers = {} if self.key: headers['X-NAAMA-CLIENT-AUTHENTICATION'] = 'id="{}", version="1"'.format(self.key) if login: hash_code = sha256(''.join((login.password, login.email, uri))).hexdigest() headers['X-NAAMA-AUTHENTICATION'] = 'username="******", response="{}", version="1"'.format(login.email, hash_code) try: r = self._methods[method](full_url, data=data, headers=headers) except KeyError: raise error.request_method(method) r.raise_for_status() try: result = json.loads(r.text) except ValueError: raise ApiInvalidResponseError(r.text) if '_error' in result and result['_error']: if 'text' in result: raise errors.ApiError((result['msg'], result['text'])) else: raise errors.ApiError(result['msg']) return result
def experiment1(datasets, numClusters): ###############---VECTOR CONFIGURATION---################ # Configure data, resulting in a list of dictionaries (labels-->vectors) # There is a dictionary for each dataset, stored in the same order as in the datasets list # dataDictionaries = randomlyConfigureActiveColumns(datasets, 5, True) # OR: dataDictionaries = explicitlyConfigureActiveColumns(datasets, [0,1,2,3], True) ###############---VECTOR NORMALIZATION---################ # At this point, have list of dictionaries. Each dictionary contains labels mapping to vectors. # All of the vectors are the same dimensionality, build in the way that we specified for configuration. normalizedDictionaries = [] for d in dataDictionaries: # print d, "\n" normalizedDictionaries.append(normalize.normalize(d)) # THERE ARE ALSO OTHER WAYS TO NORMALIZE ###################---CLUSTERING---##################### clusters = cluster.gonzalez(crunchDictionaryList(normalizedDictionaries), numClusters, distance.euclidean); ##################---STORE RESULTS---#################### # Prepare to write experiment file clusteringAlgorithmInfo = "gonzalez" distanceMeasurementInfo = "euclidean" vectorConfigurationInfo = "explicitly configured, same columns used across datasets, Indices used: [0,1,2,3]" writeFile(1, numClusters, clusteringAlgorithmInfo, distanceMeasurementInfo,vectorConfigurationInfo, clusters)
def compare_title(amazon, marc): amazon_title = amazon['normalized_title'].lower() marc_title = normalize(marc['full_title']).lower() short = False if len(amazon_title) < 9 or len(marc_title) < 9: short = True if not short: for a in amazon['titles']: for m in marc['titles']: if a == m: return ('full-title', 'exact match', 600) for a in amazon['titles']: for m in marc['titles']: if substr_match(a, m): return ('full-title', 'containted within other title', 350) max_score = 0 for a in amazon['titles']: for m in marc['titles']: percent, ordered = keyword_match(a, m) score = percent * 450 if ordered: score += 50 if score and score > max_score: max_score = score if max_score: return ('full-title', 'keyword match', max_score) elif short: return ('full-title', 'shorter than 9 characters', 0) else: return ('full-title', 'mismatch', -600)
def load_data(tup): #filename, coordinates, labels tuple size = P.INPUT_SIZE data = [] labels = [] images = dataset_3D.giveSubImage(tup[0],tup[1],size) labels += map(int,tup[2]) data += images[:] data = normalize.normalize(np.array(data, dtype=np.float32)) if P.ZERO_CENTER: data -= P.MEAN_PIXEL result = zip([tup[0]]*len(labels), np.array(data, dtype=np.float32), np.array(labels, dtype=np.int32)) if P.AUGMENT and P.AUGMENTATION_PARAMS['flip']: augmentation_extra = [] for filename, image, label in result: if label == 1: flipped_images = augment.get_all_flips_3d(image) np.random.shuffle(flipped_images) flipped_images = flipped_images[:1] #SELECT 1 RANDOM IMAGES OF 7 possible flips n_new = len(flipped_images) augmentation_extra += zip([filename]*n_new, flipped_images, [label]*n_new) else: #For false candidates take one flip combination at random :) flip_option = augment.OPTS[np.random.randint(8)] augment.flip_given_axes(image, flip_option) result += augmentation_extra return result
def make_bigrams(text): #tokens = WordPunctTokenizer().tokenize(text) tokens = normalize(text).split(' ') tokens = [t for t in tokens if len(t)] #tokens = filter(lambda t: len(t), map(normalize, tokens)) tokens = [t for t in tokens if t not in STOPWORDS] bigrams = list(tokens_to_bigrams(tokens)) return bigrams
def cropAndResize(FILENAME, newSize): img = Image.open(FILENAME).convert('RGB') arr=np.array(np.asarray(img).astype('float')) upperLeft, bottomRight = radius.get_corners(radius.get_radius(img, arr), img.size) #crop array to space we want arr = arr[upperLeft[1]:bottomRight[1], upperLeft[0]:bottomRight[0]] #make new image new_img = Image.fromarray(arr.astype('uint8'),'RGB') new_img = new_img.resize((newSize,newSize)) #get the image we just saved into an array, normalize it, then make it again arr=np.array(np.asarray(new_img).astype('float')) normalize(arr) new_img_norm = Image.fromarray(arr.astype('uint8'),'RGB') return new_img, new_img_norm
def compare_publisher(amazon, marc): if 'publisher' in amazon and 'publishers' in marc: amazon_pub = amazon['publisher'] norm_amazon = normalize(amazon_pub) for marc_pub in marc['publishers']: norm_marc = normalize(marc_pub) if norm_amazon == norm_marc: return ('publisher', 'match', 100) elif substr_match(norm_amazon, norm_marc): return ('publisher', 'occur within the other', 100) elif substr_match(norm_amazon.replace(' ', ''), norm_marc.replace(' ', '')): return ('publisher', 'occur within the other', 100) elif short_part_publisher_match(norm_amazon, norm_marc): return ('publisher', 'match', 100) return ('publisher', 'mismatch', -25) if 'publisher' not in amazon or 'publishers' not in marc: return ('publisher', 'either missing', 0)
def compare_publisher(e1, e2): if 'publishers' in e1 and 'publishers' in e2: for e1_pub in e1['publishers']: e1_norm = normalize(e1_pub) for e2_pub in e2['publishers']: e2_norm = normalize(e2_pub) if e1_norm == e2_norm: return ('publisher', 'match', 100) elif substr_match(e1_norm, e2_norm): return ('publisher', 'occur within the other', 100) elif substr_match(e1_norm.replace(' ', ''), e2_norm.replace(' ', '')): return ('publisher', 'occur within the other', 100) elif short_part_publisher_match(e1_norm, e2_norm): return ('publisher', 'match', 100) return ('publisher', 'mismatch', -25) if 'publishers' not in e1 or 'publishers' not in e2: return ('publisher', 'either missing', 0)
def index_and_count_text(self, text): """Increments word count by the number of words in text and returns a word index of the text, in (word, word_position) pairs with stop words removed. """ text = normalize(text).split() self.total_word_count += len(text) return [(word, word_pos) for word_pos, word in enumerate(text) if word not in self.stop_words]
def amazon_title(amazon_first_parts, marc_first_parts): if normalize(amazon_first_parts[0]) not in titles: return False if compare_parts(marc_first_parts, amazon_first_parts[1:]): if verbose: print("match with Amazon title") return True if match_seq(marc_first_parts, amazon_first_parts[1:]): if verbose: print("partial match, with Amazon title") return True return False
def task_from_domain_problem(domain, problem): domain_name, domain_requirements, types, type_dict, constants, \ predicates, predicate_dict, functions, actions, axioms = domain task_name, task_domain_name, task_requirements, objects, init, goal, use_metric = problem assert domain_name == task_domain_name requirements = pddl.Requirements( sorted( set(domain_requirements.requirements + task_requirements.requirements))) objects = constants + objects check_for_duplicates( [o.name for o in objects], errmsg="error: duplicate object %r", finalmsg="please check :constants and :objects definitions") init += [pddl.Atom("=", (obj.name, obj.name)) for obj in objects] task = pddl.Task(domain_name, task_name, requirements, types, objects, predicates, functions, init, goal, actions, axioms, use_metric) normalize.normalize(task) return task
def main(): timer = timers.Timer() with timers.timing("Parsing", True): task = pddl_parser.open(task_filename=options.task, domain_filename=options.domain) with timers.timing("Normalizing task"): normalize.normalize(task) if options.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) with timers.timing("Writing output"): with open("output.sas", "w") as output_file: sas_task.output(output_file) print("Done! %s" % timer)
def match_name2(name1, name2): if name1 == name2: return True n1_normalized = normalize(name1) n2_normalized = normalize(name2) if n1_normalized == n2_normalized: return True n1_parts = split_parts(name1) n2_parts = split_parts(name2) if compare_parts(n1_parts, n2_parts): return True if match_seq(n1_parts, n2_parts): return True if marc_title(n1_parts, n2_parts): return True if marc_title(n2_parts, n1_parts): return True if amazon_title(n1_parts, n2_parts): return True if amazon_title(n2_parts, n1_parts): return True return False
def gen_data(n=300, dataset='clusters'): classes_n = 4 if dataset == 'clusters': data, targets = datasets.make_classification(n_samples=n, n_features=2, n_informative=2, n_redundant=0, n_classes=4, class_sep=2.5, n_clusters_per_class=1) elif dataset == 'circles': data, targets = datasets.make_circles( n_samples=n, shuffle=True, noise=0.1, random_state=None, factor=0.1) elif dataset == 'moons': data, targets = datasets.make_moons(n_samples=n, shuffle=True, noise=0.2) train_data, valid_data, test_data = partition(data, 3) train_targets, valid_targets, test_targets = partition(targets, 3) train_data = normalize(train_data) test_data = normalize(test_data) valid_data = normalize(valid_data) train_set = to_one_hot_vector_targets(classes_n, zip(train_data, train_targets)) valid_set = to_one_hot_vector_targets(classes_n, zip(valid_data, valid_targets)) test_set = to_one_hot_vector_targets(classes_n, zip(test_data, test_targets)) return train_set, valid_set, test_set
def main(transaction_file, product_file): # FEATURES features = [ 'brand', 'color', 'color_web', 'fit', 'heel_height', 'heel_shape', 'main_group', 'material', 'material_inside', 'material_inner_sole', 'material_outer_sole', 'removable_footbed', 'season', 'shaft_height', 'shaft_width', 'subgroup' ] # DATAFRAMES transactions, products = read.all(transaction_file, product_file) # RETURNS # Get list of returns and number of returns. print("Counting number and percentage of returns...") no_transactions = counts.get(transactions, "transactions") returned_transactions, no_returns = returns.get(transactions, no_transactions) # CORRECT DATA # Unique article numbers are already used and commented out in correct.py print("Correcting data...") transactions, products = correct.all(transactions, products) # TRANSACTION & PRODUCT COUNT print("Counting transactions and products...") # Get number of transactions no_transactions = counts.get(transactions, "transactions") # Get average transactions per customer + max per customer unknown, avg_transactions, max_transactions = counts.transactions_per_customer( transactions) # Get number of products no_products = counts.get(products, "products") # PLOT FREQUENCIES PROPERTY VALUES # Make and save plots for product properties. Figures are saved in folder /output/. #print("Plotting frequencies for article property values...") #property_frequencies.plot(transactions, products) # Print lines x until y from all transactions. #print("Getting selected lines...") #selected_lines = lines.select(transactions, 0, 3) # NORMALIZE FEATURES print("Normalizing features...") products = normalize.normalize( products, ['shaft_height', 'shaft_width', 'heel_height']) # CALCULATE PRODUCT SIMILARITY print("Calculating product similarity...") get_similarity(products, features)
def compare_publisher(amazon, marc): if 'publishers' not in amazon or 'publishers' not in marc: return ('publishers', 'either missing', 0) assert 'publishers' in amazon and 'publishers' in marc for amazon_pub in amazon['publishers']: norm_amazon = normalize(amazon_pub) for marc_pub in marc['publishers']: norm_marc = normalize(marc_pub) if norm_amazon == norm_marc: return ('publishers', 'match', 100) # if re_press.sub('', norm_amazon) == re_press.sub('', norm_marc): # return ('publishers', 'match', 100) if substr_match(norm_amazon, norm_marc): return ('publishers', 'occur within the other', 100) if substr_match(norm_amazon.replace(' ', ''), norm_marc.replace(' ', '')): return ('publishers', 'occur within the other', 100) if short_part_publisher_match(norm_amazon, norm_marc): return ('publishers', 'match', 100) return ('publishers', 'mismatch', -25)
def handle_archive(file: Path, root_folder: Path, dir_name: str): target_folder = root_folder / dir_name target_folder.mkdir(exist_ok=True) # create folder archives ext = Path(file).suffix folder_for_archive = normalize(file.name.replace(ext, "")) archive_folder = target_folder / folder_for_archive archive_folder.mkdir(exist_ok=True) # create folder archives/name_archive try: shutil.unpack_archive(str(file.resolve()), str(archive_folder.resolve())) except shutil.ReadError: archive_folder.rmdir() # если не успешно удаляем папку под архив return None file.unlink() # Если успешно удаляем архив
def main(): print("-------------POND Translator-----------") args = parse_args() timer = timers.Timer() with timers.timing("Parsing", True): task = pddl.open(task_filename=args.task, domain_filename=args.domain) print() print("Problem Filename = " + args.task) print("Domain Filename = " + args.domain) print() with timers.timing("Normalizing task"): normalize.normalize(task) if args.generate_relaxed_task: # Remove delete effects. for action in task.actions: for index, effect in reversed(list(enumerate(action.effects))): if effect.literal.negated: del action.effects[index] sas_task = pddl_to_sas(task) dump_statistics(sas_task) if not sas_task is None: with timers.timing("Writing output"): with open( "..\\webapps\\LunaPlanner\\translator_output\\output.sas", "w") as output_file: sas_task.output(output_file) print() print("SAS file saved at: " + output_file.name) print("Done! %s" % timer)
def build_titles(title): """ Uses a full title to create normalized and short title versions. :param str title: Full title of an edition :rtype: dict :return: An expanded set of title variations """ normalized_title = normalize(title).lower() titles = [title, normalized_title] if title.find(' & ') != -1: t = title.replace(" & ", " and ") titles.append(t) titles.append(normalize(t)) t2 = [] for t in titles: if t.lower().startswith('the '): t2.append(t[4:]) elif t.lower().startswith('a '): t2.append(t[2:]) titles += t2 if re_amazon_title_paren.match(title): t2 = [] for t in titles: m = re_amazon_title_paren.match(t) if m: t2.append(m.group(1)) t2.append(normalize(m.group(1))) titles += t2 return { 'full_title': title, 'normalized_title': normalized_title, 'titles': titles, 'short_title': normalized_title[:25], }
def instantiate_task(task, check_infeasible=True, **kwargs): start_time = time() print() normalize.normalize(task) if FD_INSTANTIATE: relaxed_reachable, atoms, actions, axioms, reachable_action_params = instantiate.explore( task) else: relaxed_reachable, atoms, actions, axioms = instantiate_domain( task, **kwargs) reachable_action_params = get_reachable_action_params(actions) #for atom in sorted(filter(lambda a: isinstance(a, pddl.Literal), set(task.init) | set(atoms)), # key=lambda a: a.predicate): # print(fact_from_fd(atom)) #print(axioms) #for i, action in enumerate(sorted(actions, key=lambda a: a.name)): # print(i, transform_action_args(pddl_from_instance(action), obj_from_pddl)) print('Infeasible:', not relaxed_reachable) print('Instantiation time:', elapsed_time(start_time)) if check_infeasible and not relaxed_reachable: return None goal_list = instantiate_goal(task.goal) return InstantiatedTask(task, atoms, actions, axioms, reachable_action_params, goal_list)
def main(task=None, sas_fname=None, max_num_actions=float("inf"), pg_generator=None): timer = timers.Timer() if task is None: import options domain_fname = options.domain task_fname = options.task sas_fname = options.sas_file with timers.timing("Parsing", True): task = pddl_parser.open(domain_filename=domain_fname, task_filename=task_fname) with timers.timing("Normalizing task"): normalize.normalize(task) sas_task = pddl_to_sas(task, max_num_actions, pg_generator) dump_statistics(sas_task) with timers.timing("Writing output"): with open(sas_fname, "w") as output_file: sas_task.output(output_file) print("Done! %s" % timer)
def amazon_spaced_name(amazon, marc): len_amazon = len(amazon) if len_amazon != 30 and len_amazon != 31: return False m = re_amazon_space_name.search(amazon) if not m: return False amazon_surname = m.group(1) if normalize(amazon_surname) == normalize(marc): return True amazon_initals = m.group(2) m = re_marc_name.match(marc) if not m: return False marc_surname = m.group(1) if normalize(amazon_surname) != normalize(marc_surname): return False marc_first_parts = split_parts(m.group(2)) amazon_first_parts = [x for x in amazon_initals] if compare_parts(marc_first_parts, amazon_first_parts): return True if match_seq(amazon_first_parts, marc_first_parts): return True return False
def forward(P, pi, sigma, mu, X): #P is the transition matrix. #L is the local evidence vector L[i,j] means #T deduced from X #X as observed data. #the resulting alpha array: alpha(n,j) means with obervation 1-n, the hidden zn=j K = P.shape[0] T = X.shape[0] #initialize alpha = np.zeros((T, K)) Z = np.zeros(T) L = np.zeros((T, K)) for i in range(T): for j in range(K): L[i, j] = mvnorm.pdf(X[i], mean=mu[j], cov=sigma[j]) [alpha[0, :], Z[0]] = normalize(np.multiply(L[0, :], pi)) for i in range(1, T): [alpha[i, :], Z[i] ] = normalize(np.multiply(L[i, :], P.transpose().dot(alpha[i - 1, :]))) return (alpha, Z)
def forback(P, pi, sigma, mu, X): T = X.shape[0] K = mu.shape[0] alpha, _ = forward(P, pi, sigma, mu, X) beta = backward(P, pi, sigma, mu, X) beta.shape gamma = np.zeros((T, K)) for i in range(T): for j in range(K): gamma[i][j] = alpha[i][j] * beta[i][j] for i in range(T): gamma[i, :], _ = normalize(gamma[i, :]) return gamma
def readData(): #assuming we already have db collection = db.newData cursor = collection.find({}) #cursor = collection.find({"_id":ObjectId("60f3d5185b92c5b595de2d72")}) names = [] for document in cursor: damn = [] for participant in document["A"]: addData(participant) damn.append( (participant["championName"], participant["teamPosition"])) #print(participant["championName"],participant["teamPosition"]) for participant in document["B"]: addData(participant) damn.append( (participant["championName"], participant["teamPosition"])) #print(participant["championName"],participant["teamPosition"]) names.append(damn) prep_x() normalized = np.transpose(np.array(normalize(total))) print(normalized.shape) print(len(labels)) #X_train, X_test, y_train, y_test = train_test_split(normalized,labels,random_state=1, test_size=0.1) sc_X = StandardScaler() #X_trainscaled=sc_X.fit_transform(X_train) X_test = normalized y_test = labels X_testscaled = sc_X.fit_transform(X_test) newclf = joblib.load('my_model.pkl') #clf = MLPClassifier(hidden_layer_sizes=(256,128,64,32),activation="relu",random_state=1,verbose=True,max_iter=20).fit(X_trainscaled, y_train) y_pred = newclf.predict(X_testscaled) for i in range(len(y_pred)): if y_pred[i] != y_test[i]: print(names[i // 10]) print( "predicted %s to play in lane %s(predict) instead of lane %s(true)" % (names[i // 10][i % 10], positionsDict[y_pred[i]], positionsDict[y_test[i]])) print((i % 10, y_pred[i], y_test[i])) #print("prediction is %r, true label is %r" % (y_pred[i],y_test[i])) print(newclf.score(X_testscaled, y_test))
def __call__(self, message: str) -> Result: check = normalize(message).lower() changed = False check = "".join([l for l in check if l in LETTERS]) check = check.split(" ") tokens = [] for word in self.words: if word in check: message = sub(word, "#" * len(word), message, IGNORECASE) changed = True tokens.append(word) return Result(message, changed, tokens)
def timeEvolution(self, f, dt, k, v=[-1, 0, 1]): """ when called, uses an initial wavefunction f (array-like), the timestep dt (float), the time sampling k (int), and the relative vertices in v (array-like) to calculate the full time-evolution of the wavefunction (array-like) """ # calculate the time propagation operator if one is not provided U = self.propagationOperator(dt, v) # normalize the wavefunction f = normalize(self.dx, self.n, f) # prepare the full time-evolution array F = np.zeros([len(f), k], dtype='complex128') # initialize time-evolution array with initial wavefunction F[:, 0] = f # loop through time samples to generate time-evolution for i in xrange(1, k): F[:, i] = U * F[:, i - 1] return F
def main(transaction_file, product_file, n): # FEATURES features = [ 'brand', 'color', 'color_web', 'fit', 'heel_height', 'heel_shape', 'main_group', 'material', 'material_inside', 'material_inner_sole', 'material_outer_sole', 'removable_footbed', 'season', 'shaft_height', 'shaft_width', 'subgroup' ] # READ DATA transactions, products = read.all(transaction_file, product_file) # Take smaller sample #transactions = transactions.sample(n) transactions = transactions[:n] # CORRECT DATA # Unique article numbers are already used and commented out in correct.py transactions, products = correct.all(transactions, products) # NORMALIZE FEATURES print("Normalizing features...") products = normalize.normalize( products, ['shaft_height', 'shaft_width', 'heel_height']) # GET REPEAT CUSTOMERS # Make series with amount of purchases for each customer. print("Getting repeat customers...") all_customers = transactions['customer'].value_counts() # List all customers with > 1 purchase. repeat_customers = all_customers[all_customers > 1] # Get similarity score with other purchases from customer. print("Calculating similarity from related purchases...") sims, article_ids = history_similarity(transactions, products, features, repeat_customers[1:]) # Get average similarity score between products from sample. print("Calculating average similarity from sample...") gen_mean = average_similarity(products, features) # Get statistics print("Getting statistics...") sim_stats = get_stats(sims) # Save similarities, mean, median and standard deviation. print("Saving data...") save_results(article_ids, sims, sim_stats, gen_mean)
def eigenVectors(n, dx, A): """ given sample numbering n (array-like), sample separation dx (array-like), and an operator A (array-like), calculates the eigenvalues (array-like) and the eigenvectors (array-like) and returns them in a list """ # calculate total number of samples N = np.prod(n) # calculate the first sqrt(N) eigenvalues/vectors (u, v) = spsplin.eigsh(A, int(np.sqrt(N)), None, None, 'SM') # calculate shape of v m = np.shape(v) # for each eigenvector for i in xrange(0, m[1]): # normalize each eigenvector v[:, i] = normalize(dx, n, v[:, i]) # return eigenvalues and eigenvectors in a list ev = [u, v] return ev
def handle_archive(filename: Path, target_folder: Path): # Создаем папку для архивов target_folder.mkdir(exist_ok=True, parents=True) # Создаем папку куда распаковываем архив # Берем суффикс у файла и убираем replace(filename.suffix, '') folder_for_file = target_folder / \ normalize(filename.name.replace(filename.suffix, '')) # создаем папку для архива с именем файла folder_for_file.mkdir(exist_ok=True, parents=True) try: shutil.unpack_archive(str(filename.resolve()), str(folder_for_file.resolve())) except shutil.ReadError: print(f'Обман - это не архив {filename}!') folder_for_file.rmdir() return None filename.unlink()
def _make_property(self, review_dict_list: list) -> tuple: """ review_dict's keys are 'date', 'star', 'vote', 'name', 'title' and 'review' """ reviews = OrderedDict() for idx, review_dict in enumerate(review_dict_list): review = normalize(review_dict['review']) reviews[idx] = review text_list = [[ term.word for term in self._tokenizer.get_baseforms(review) ] for review in reviews.values()] dictionary = Dictionary(text_list) dictionary.filter_extremes(no_below=1, no_above=0.6) corpus = [dictionary.doc2bow(words) for words in text_list] return corpus, dictionary
def ae_predict(data): json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights("model.h5") #----------get output from latent space data = normalize(data/256) data = data/data.max() print("data range", data.min(), data.max()) layer_name = 'latent_space' intermediate_layer_model = Model(inputs=loaded_model.input, outputs=loaded_model.get_layer(layer_name).output) intermediate_output = intermediate_layer_model.predict(data) return intermediate_output
def sineramp(sze=[256, 512], amp=12.5, wavelen=8, p=2): if len(sze) == 1: rows, cols = sze, sze elif len(sze) == 2: rows, cols = sze else: print('size must be a 1 or 2 element vector') '''Adjust width of image so that we have an integer number of cycles of the sinewave. This is helps should one be using the test image to evaluate a cyclic colour map. However you will still see a slight cyclic discontinuity at the top of the image, though this will disappear at the bottom of the test image''' cycles = np.round(cols / wavelen) cols = cycles * wavelen # Sine wave x = np.arange(0, cols - 1) fx = amp * np.sin(1 / wavelen * 2 * np.pi * x) # Vertical modulating function A = (np.arange(rows - 1, 0, -1) / (rows - 1))**p # A = ([(rows-1):-1:0]/(rows-1))**p im = A[:, np.newaxis] * fx[np.newaxis, :] # print(im.shape) # Add ramp ramp, _ = np.meshgrid(np.arange(0, cols - 1), np.arange(1, rows)) / (cols - 1) # print(ramp.shape) # ramp = meshgrid(0:(cols-1), 1:rows)/(cols-1) im = im + ramp * (255 - 2 * amp) # print(im.shape) # Now normalise each row so that it spans the full data range from 0 to 255. # This ensures that, at the lower edge of the image, the full colour map is # displayed. It also helps with the evaluation of cyclic colour maps though # a small cyclic discontinuity will remain at the top of the test image. for r in range(rows - 1): im[r, :] = normalize.normalize(im[r, :]) im = im * 255 return im
def save_gscale_img(fn, grayimg, mask_erode, mask_dilate): mask_erode = mask_erode.astype(float) mask_dilate = mask_dilate.astype(float) grayimg = grayimg.astype(float) # include all pixels 'on' in eroded binary shadow # exclude all pixels 'off' in dilated binary shadow img = grayimg img = np.maximum(img, mask_erode) img = np.minimum(img, mask_dilate) if do_normalize: img = normalize(img, figure_size, outsize) out = 1 - img out = out * 255 out = out.astype(int) else: out = 1 - img out = out * 255 out = out.astype(int) out = misc.imresize(out, (outsize, outsize)) misc.imsave(fn, out)
def page2(request): if request.method == "POST": print(request.POST) str1 = "media/documents/" str2 = request.FILES['docfile'].name if os.path.isfile(str1 + str2): os.remove(str1 + str2) newdoc = Document(docfile=request.FILES['docfile']) newdoc.save() request.session['classes'] = request.POST['classes'] request.session['remove'] = request.POST['remove'] request.session['docfile'] = request.FILES['docfile'].name colonne = range(3) data = open_dataset(request.session['docfile']) normalizedData = normalize(data) stats = getStats(normalizedData) return render(request, 'kmeans/page2.html', { 'colonne': colonne, 'stats': stats }) else: return render(request, 'kmeans/page1.html', {})
def inertia_plot(): from normalize import normalize import pandas as pd import numpy as np from sklearn.cluster import KMeans from sklearn.metrics import silhouette_samples, silhouette_score import matplotlib.pyplot as plt #data_set_1 = pd.read_csv("SongCSV.csv") new_data = normalize() data = new_data.iloc[:, 18:19] temp_1 = 10**8 x = np.linspace(1, 20, num=20) array = [] for i in range(1, 21): kmeans = KMeans(n_clusters=i, max_iter=200, random_state=0, verbose=0).fit(data) array.append(kmeans.inertia_) plt.plot(x, array, '-x') return plt.show
def process_data(filename): ''' Read data from file and prepare for processing ''' # save cwd and change cwd saved_cwd = getcwd() chdir('datasets') # construct array from tab delimited file data, labels_list = to_array(filename) # create mapping of labels associated with dataset and key to index labels labeling = categorize_labels(labels_list) # normalize data normalizing = normalize(data) # restore saved cwd chdir(saved_cwd) return data, normalizing, labeling
def get_image(lung, deterministic): lung[lung == -2000] = 0 #lung = lung - 1024 truth = np.zeros_like(lung) outside = np.where(lung == 0, 1, 0) ####################### outside = np.array(outside, dtype=np.float32) truth = np.array(np.round(truth), dtype=np.int64) outside = np.array(np.round(outside), dtype=np.int64) #Set label of outside pixels to -10 truth = truth - (outside * 10) lung = lung * (1 - outside) lung = lung - outside * 3000 if P.INPUT_SIZE > 0: lung = crop_or_pad(lung, INPUT_SIZE, -3000) truth = crop_or_pad(truth, OUTPUT_SIZE, 0) outside = crop_or_pad(outside, OUTPUT_SIZE, 1) else: out_size = output_size_for_input(lung.shape[1], P.DEPTH) #lung = crop_or_pad(lung, INPUT_SIZE, -1000) truth = crop_or_pad(truth, out_size, 0) outside = crop_or_pad(outside, out_size, 1) lung = normalize.normalize(lung) lung = np.expand_dims(np.expand_dims(lung, axis=0), axis=0) if P.ZERO_CENTER: lung = lung - P.MEAN_PIXEL truth = np.array(np.expand_dims(np.expand_dims(truth, axis=0), axis=0), dtype=np.int64) return lung, truth
def save(): json_from_client = flask.request.json entry = normalize(json_from_client['entry']) entry = add_class(entry) if json_from_client['edit'] == 'edit': index = json_from_client['index'] try: res = es.index(index='temp_dict', doc_type='entry', id=index, body=entry) result = {'feedback': 'success', 'id': res['_id']} except: result = {'feedback': 'failure'} else: try: res = es.index(index='temp_dict', doc_type='entry', body=entry) result = {'feedback': 'success', 'id': res['_id']} except: result = {'feedback': 'failure'} return flask.jsonify(result=result)