def main():
    timer = timers.Timer()
    
    
    with timers.timing("Parsing", True):
        task = pddl_parser.open(task_filename=options.task, domain_filename=options.domain)
    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if options.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    # Print pddl if a transormation option is selected.
    if options.exp or options.evmdd:
        pddl_parser.print_pddl(options.domain, sas_task, task, [])
        print("done!")
        exit(0)

    with timers.timing("Writing output"):
        with open("output.sas", "w") as output_file:
            sas_task.output(output_file)
    print("Done! %s" % timer) 
Exemple #2
0
def main():
    args = parse_args()

    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl.open(task_filename=args.task, domain_filename=args.domain)

    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if args.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    if not sas_task is None:
        with timers.timing("Writing output"):
            with open("output.sas", "w") as output_file:
                sas_task.output(output_file)
        print("Done! %s" % timer)
Exemple #3
0
def build_titles(title):
    normalized_title = normalize(title).lower()
    titles = [ title, normalized_title ];
    if title.find(' & ') != -1:
        t = title.replace(" & ", " and ")
        titles.append(t)
        titles.append(normalize(t))
    t2 = []
    for t in titles:
        if t.lower().startswith('the '):
            t2.append(t[4:])
        elif t.lower().startswith('a '):
            t2.append(t[2:])
    titles += t2

    if re_amazon_title_paren.match(title):
        t2 = []
        for t in titles:
            m = re_amazon_title_paren.match(t)
            if m:
                t2.append(m.group(1))
                t2.append(normalize(m.group(1)))
        titles += t2

    return {
        'full_title':       title,
        'normalized_title': normalized_title,
        'titles':           titles,
        'short_title':      normalized_title[:25],
    }
Exemple #4
0
def index(**kwargs):
    if request.args.get('submit') is not None:
        active = request.form.get('tabStatus')
        params = ['siteDest', 'siteSource']
        if active == 'page':
            params.append('title')
        return redirect(url_for('.index', **get_params(params)), code=c.REQUEST)

    normalize(['title'], kwargs)
    if not request.form.get('tabStatus', False):
        if kwargs.get('siteDest', False) and not kwargs.get('title', False):
            kwargs['tabStatus'] = 'content'
        else:
            kwargs['tabStatus'] = 'page'

    if not request.form.get('siteDest', False) and not request.form.get('siteSource', False):
        kwargs['siteDest'] = 'th'
        kwargs['siteSource'] = 'en'

    form = wikitranslator.form.getForm()(request.form, **kwargs)
    data = wikitranslator.model.Model(form=form)
    if form.validate(data):
        data.render()
    return render('index.html',
                  tool=__name__,
                  form=form,
                  data=data)
Exemple #5
0
def main():
    print("-------------POND Translator-----------")
    args = parse_args()

    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl.open(task_filename=args.task, domain_filename=args.domain)
      
        print();
        print("Problem Filename = " + args.task);
        print("Domain Filename = " + args.domain);
        print();
        
    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if args.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    if not sas_task is None:
        with timers.timing("Writing output"):
            with open("..\\webapps\\LunaPlanner\\translator_output\\output.sas", "w") as output_file:
                sas_task.output(output_file)
                
        print()
        print("SAS file saved at: " + output_file.name)
                
        print("Done! %s" % timer)
Exemple #6
0
def main():
    args = parse_args()

    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl.open(task_filename=args.task,
                         domain_filename=args.domain,
                         addl_filename=args.addl)

    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if args.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    output_file = args.output_file
    use_proto = args.use_proto
    print('Use Proto:', use_proto)

    sas_task = pddl_to_sas(task, args.agent_id, args.agent_url)
    dump_statistics(sas_task)

    with timers.timing("Writing output"):
        with open(output_file, "w") as output_file:
            if use_proto:
                sas_task.output_proto(output_file)
            else:
                sas_task.output(output_file)
    print("Done! %s" % timer)
Exemple #7
0
def marc_title(amazon_first_parts, marc_first_parts):
#            print 'title found: ', marc_first_parts[-1]
    if normalize(marc_first_parts[-1]) not in titles:
        return False
    if compare_parts(marc_first_parts[:-1], amazon_first_parts):
        if verbose:
            print("match with MARC end title")
        return True
    if normalize(amazon_first_parts[0]) in titles:
        if compare_parts(marc_first_parts[:-1], amazon_first_parts[1:]):
            if verbose:
                print("match, both with titles")
            return True
        if match_seq(marc_first_parts[:-1], amazon_first_parts[1:]):
            if verbose:
                print("partial match, both with titles")
            return True
    if match_seq(marc_first_parts[:-1], amazon_first_parts):
        if verbose:
            print("partial match with MARC end title")
        return True
    if match_seq(marc_first_parts, amazon_first_parts):
        if verbose:
            print("partial match with MARC end title")
    return False
Exemple #8
0
def main():
    options, args = parse_options()

    check_python_version(options.force_old_python)

    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl.open()

    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if options.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    with timers.timing("Writing output"):
        with open("output.sas", "w") as output_file:
            sas_task.output(output_file)
    print("Done! %s" % timer)
def main():
    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl_parser.open(
            domain_filename=options.domain, task_filename=options.task)

    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if options.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    with timers.timing("Writing output"):
        with open("output.sas", "w") as output_file:
            sas_task.output(output_file)
    print("Done! %s" % timer)
    global t1, t2
    t2 = time.time() - t2
    print('Time1:', t1)
    print('Time2:', t2)
Exemple #10
0
def compare_author_fields(e1_authors, e2_authors):
    for i in e1_authors:
        for j in e2_authors:
            if normalize(i['db_name']) == normalize(j['db_name']):
                return True
            if normalize(i['name']).strip('.') == normalize(j['name']).strip('.'):
                return True
    return False
def translate(task):
  normalize.normalize(task)
  prog = PrologProgram()
  translate_facts(prog, task)
  for conditions, effect in normalize.build_exploration_rules(task):
    prog.add_rule(Rule(conditions, effect))
  prog.normalize()
  prog.split_rules()
  return prog
Exemple #12
0
    def normalize(self):
        """
        Performs normalization. At this level, we do those normalizations that
        needs both the pre & post syscall objects
        """
        import normalize

        for id in self:
            pre,post = self.getSyscallByID(id)

            normalize.normalize(pre, post)
Exemple #13
0
def flip_marc_name(marc):
    m = re_marc_name.match(marc)
    if not m:
        return remove_trailing_dot(marc)
    first_parts = split_parts(m.group(2))
    if normalize(first_parts[-1]) not in titles:
        # example: Eccles, David Eccles Viscount
        return remove_trailing_dot(m.group(2)) + ' ' + m.group(1)
    if len(first_parts) > 2 and normalize(first_parts[-2]) == normalize(m.group(1)):
        return u' '.join(first_parts[0:-1])
    return u' '.join(first_parts[:-1] + [m.group(1)])
Exemple #14
0
  def __init__(self, item_id, quantity, *options):
    """Store the descriptors of an order item in this object.

    Arguments:
    item_id -- the restaurants's numerial ID for the item
    quantity -- the quantity
    options -- any number of options to apply to the item

    """
    self.item_id = normalize(item_id, 'number')
    self.quantity = normalize(quantity, 'number')
    self.options = [normalize(option, 'number') for option in options]
  def get_delivery_check(self, restaurant_id, date_time, address):
    """Get data about a given restaurant, including whether it will deliver to
    the specified address at the specified time

    Arguments:
    restaurant_id -- Ordr.in's restaurant identifier
    date_time -- Either 'ASAP' or a datetime object in the future
    address -- the address to deliver to. Should be an ordrin.data.Address object

    """
    dt = normalize(date_time, 'datetime')
    restaurant_id = normalize(restaurant_id, 'number')
    return self._call_api('GET', ('dc', restaurant_id, dt, address.zip, address.city, address.addr))
Exemple #16
0
def index(**kwargs):
    if request.args.get("submit") is not None:
        return redirect(url_for(".index", **get_params(["title", "oldid"])), code=c.REQUEST)

    normalize(["title"], kwargs)
    form = dykchecker.form.getForm()(request.form, **kwargs)
    data = dykchecker.model.Model(form=form)
    if form.validate(data):
        data.render()
        return render("page.html", tool=__name__, form=form, data=data)

    else:
        return render("index.html", tool=__name__, form=form)
def translate(task):
    with timers.timing("Normalizing task"):
        normalize.normalize(task)
    with timers.timing("Generating Datalog program"):
        prog = PrologProgram()
        translate_facts(prog, task)
        for conditions, effect in normalize.build_exploration_rules(task):
            prog.add_rule(Rule(conditions, effect))
    with timers.timing("Normalizing Datalog program", block=True):
        # Using block=True because normalization can output some messages
        # in rare cases.
        prog.normalize()
        prog.split_rules()
    return prog
Exemple #18
0
  def update(self, login, first_name, last_name):
    """Updates account for the user associated with login. Throws a relevant exception
    on failure.

    Arguments:
    login -- the user's login information. Should be an ordrin.data.UserLogin object
    first_name -- the user's first name
    last_name -- the user's last name
    
    """
    data = {'email':login.email,
            'first_name':normalize(first_name, 'name'),
            'last_name':normalize(last_name, 'name'),
            'pw':login.password}
    return self._call_api('POST', ('u', login.email), login=login, data=data)
Exemple #19
0
  def set_credit_card(self, login, card_nick, credit_card):
    """Save an credit card by nickname for the logged in user
    Throws a relevant exception on failure

    Arguments:
    login -- the user's login information. Should be an ordrin.data.UserLogin object
    card_nick -- the nickname of the credit card to save
    credit_card -- the credit card to save. Should be an ordrin.data.CreditCard object
    
    """
    card_nick = normalize(card_nick, 'nick')
    data = credit_card.make_dict()
    data.update(login.make_dict())
    data['nick'] = card_nick
    data['phone'] = normalize(credit_card.phone, 'phone')
    return self._call_api('PUT', ('u', login.email, 'ccs', card_nick), login=login, data=data)
Exemple #20
0
 def do_wicked(self, newcontent, title, section):
     try:
         wicked = getWicked(self.context.getField('text'), self.context)
         wicked.section=section 
         wicked.manageLink(newcontent, normalize(title))
     except ComponentLookupError:
         pass
def run(parser, args):
##    
    ## TODO - just change to 1 argument: --protocol -- with options [1,2,3,4]
    if args.protocol1:
        protocol=1
    elif args.protocol2:
        protocol=2
    elif args.protocol3:
        protocol=3
    elif args.protocol4:
        protocol=4
    elif args.protocol5:
        protocol=5
    elif args.protocol6:
        protocol=6
    
    late = normalize(latestage=args.latestage, protocol=protocol, earlystage=args.earlystage, pseudo=args.pseudo, bandwidth=args.bandwidth, quiet=args.quiet)

    if args.regions:
        # find maximums (summits) within regions given
        regions = BedTool(args.regions)
        
    else:
        # find peak regions by algorithm at top, then summits within them
        ## Read in states bedGraph, identify peaks
##        states = CovBed(args.states)
        regions = find_candidate_regions(args.states, thresh_state=1, merge1=10e3, minwidth=50e3, merge2=40e3, max_state_thresh=2, internal=0.8)

    ##Covert CovBed object to BedTool object
    a = BedTool( StringIO.StringIO( late.get_bdg(bdg=late.count, collapsed=True) ) )
    ans = summits(a = a, b = regions)
    print str(ans).strip()
 def _call_api(self, method, arguments, login=None, data=None):
   """Calls the api at the saved url and returns the return value as Python data structures.
   Rethrows any api error as a Python exception"""
   method = normalize(method, 'method')
   uri = '/'+('/'.join(urllib.quote_plus(str(arg)) for arg in arguments))
   full_url = self.base_url+uri
   headers = {}
   if self.key:
     headers['X-NAAMA-CLIENT-AUTHENTICATION'] = 'id="{}", version="1"'.format(self.key)
   if login:
     hash_code = sha256(''.join((login.password, login.email, uri))).hexdigest()
     headers['X-NAAMA-AUTHENTICATION'] = 'username="******", response="{}", version="1"'.format(login.email, hash_code)
   try:
     r = self._methods[method](full_url, data=data, headers=headers)
   except KeyError:
     raise error.request_method(method)
   r.raise_for_status()
   try:
     result = json.loads(r.text)
   except ValueError:
     raise ApiInvalidResponseError(r.text)
   if '_error' in result and result['_error']:
     if 'text' in result:
       raise errors.ApiError((result['msg'], result['text']))
     else:
       raise errors.ApiError(result['msg'])
   return result
Exemple #23
0
def experiment1(datasets, numClusters):

	###############---VECTOR CONFIGURATION---################

	# Configure data, resulting in a list of dictionaries (labels-->vectors)
	# There is a dictionary for each dataset, stored in the same order as in the datasets list
	# dataDictionaries = randomlyConfigureActiveColumns(datasets, 5, True)
	# OR:
	dataDictionaries = explicitlyConfigureActiveColumns(datasets, [0,1,2,3], True) 


	###############---VECTOR NORMALIZATION---################

	# At this point, have list of dictionaries. Each dictionary contains labels mapping to vectors.
	# All of the vectors are the same dimensionality, build in the way that we specified for configuration.
	normalizedDictionaries = []
	for d in dataDictionaries:
		# print d, "\n"
		normalizedDictionaries.append(normalize.normalize(d)) # THERE ARE ALSO OTHER WAYS TO NORMALIZE


	###################---CLUSTERING---#####################

	clusters = cluster.gonzalez(crunchDictionaryList(normalizedDictionaries), numClusters, distance.euclidean);


	##################---STORE RESULTS---####################

	# Prepare to write experiment file
	clusteringAlgorithmInfo = "gonzalez"
	distanceMeasurementInfo = "euclidean"
	vectorConfigurationInfo = "explicitly configured, same columns used across datasets, Indices used: [0,1,2,3]"

	writeFile(1, numClusters, clusteringAlgorithmInfo, distanceMeasurementInfo,vectorConfigurationInfo, clusters)
Exemple #24
0
def compare_title(amazon, marc):
    amazon_title = amazon['normalized_title'].lower()
    marc_title = normalize(marc['full_title']).lower()
    short = False
    if len(amazon_title) < 9 or len(marc_title) < 9:
        short = True

    if not short:
        for a in amazon['titles']:
            for m in marc['titles']:
                if a == m:
                    return ('full-title', 'exact match', 600)

        for a in amazon['titles']:
            for m in marc['titles']:
                if substr_match(a, m):
                    return ('full-title', 'containted within other title', 350)

    max_score = 0
    for a in amazon['titles']:
        for m in marc['titles']:
            percent, ordered = keyword_match(a, m)
            score = percent * 450
            if ordered:
                score += 50
            if score and score > max_score:
                max_score = score
    if max_score:
        return ('full-title', 'keyword match', max_score)
    elif short:
        return ('full-title', 'shorter than 9 characters', 0)
    else:
        return ('full-title', 'mismatch', -600)
Exemple #25
0
def load_data(tup): #filename, coordinates, labels tuple
    size = P.INPUT_SIZE
    data = []
    labels = []

    images = dataset_3D.giveSubImage(tup[0],tup[1],size)
    labels += map(int,tup[2])
    data += images[:]

    data = normalize.normalize(np.array(data, dtype=np.float32))

    if P.ZERO_CENTER:
        data -= P.MEAN_PIXEL

    result = zip([tup[0]]*len(labels), np.array(data, dtype=np.float32), np.array(labels, dtype=np.int32))

    if P.AUGMENT and P.AUGMENTATION_PARAMS['flip']:
        augmentation_extra = []

        for filename, image, label in result:
            if label == 1:
                flipped_images = augment.get_all_flips_3d(image)
                np.random.shuffle(flipped_images)
                flipped_images = flipped_images[:1] #SELECT 1 RANDOM IMAGES OF 7 possible flips
                n_new = len(flipped_images)

                augmentation_extra += zip([filename]*n_new, flipped_images, [label]*n_new)
            else: #For false candidates take one flip combination at random :)
                flip_option = augment.OPTS[np.random.randint(8)]
                augment.flip_given_axes(image, flip_option)

        result += augmentation_extra


    return result
Exemple #26
0
def make_bigrams(text):
    #tokens = WordPunctTokenizer().tokenize(text)
    tokens = normalize(text).split(' ')
    tokens = [t for t in tokens if len(t)]
    #tokens = filter(lambda t: len(t), map(normalize, tokens))
    tokens = [t for t in tokens if t not in STOPWORDS]
    bigrams = list(tokens_to_bigrams(tokens))
    return bigrams
def cropAndResize(FILENAME, newSize):
    img = Image.open(FILENAME).convert('RGB')
    arr=np.array(np.asarray(img).astype('float'))
    upperLeft, bottomRight = radius.get_corners(radius.get_radius(img, arr), img.size)

    #crop array to space we want
    arr = arr[upperLeft[1]:bottomRight[1], upperLeft[0]:bottomRight[0]]
    #make new image
    new_img = Image.fromarray(arr.astype('uint8'),'RGB')
    new_img = new_img.resize((newSize,newSize))

    #get the image we just saved into an array, normalize it, then make it again
    arr=np.array(np.asarray(new_img).astype('float'))
    normalize(arr)
    new_img_norm = Image.fromarray(arr.astype('uint8'),'RGB')
    
    return new_img, new_img_norm
Exemple #28
0
def compare_publisher(amazon, marc):
    if 'publisher' in amazon and 'publishers' in marc:
        amazon_pub = amazon['publisher']
        norm_amazon = normalize(amazon_pub)
        for marc_pub in marc['publishers']:
            norm_marc = normalize(marc_pub)
            if norm_amazon == norm_marc:
                return ('publisher', 'match', 100)
            elif substr_match(norm_amazon, norm_marc):
                return ('publisher', 'occur within the other', 100)
            elif substr_match(norm_amazon.replace(' ', ''), norm_marc.replace(' ', '')):
                return ('publisher', 'occur within the other', 100)
            elif short_part_publisher_match(norm_amazon, norm_marc):
                return ('publisher', 'match', 100)
        return ('publisher', 'mismatch', -25)

    if 'publisher' not in amazon or 'publishers' not in marc:
        return ('publisher', 'either missing', 0)
Exemple #29
0
def compare_publisher(e1, e2):
    if 'publishers' in e1 and 'publishers' in e2:
        for e1_pub in e1['publishers']:
            e1_norm = normalize(e1_pub)
            for e2_pub in e2['publishers']:
                e2_norm = normalize(e2_pub)
                if e1_norm == e2_norm:
                    return ('publisher', 'match', 100)
                elif substr_match(e1_norm, e2_norm):
                    return ('publisher', 'occur within the other', 100)
                elif substr_match(e1_norm.replace(' ', ''), e2_norm.replace(' ', '')):
                    return ('publisher', 'occur within the other', 100)
                elif short_part_publisher_match(e1_norm, e2_norm):
                    return ('publisher', 'match', 100)
        return ('publisher', 'mismatch', -25)

    if 'publishers' not in e1 or 'publishers' not in e2:
        return ('publisher', 'either missing', 0)
Exemple #30
0
 def index_and_count_text(self, text):
     """Increments word count by the number of words in text and returns
     a word index of the text, in (word, word_position) pairs with
     stop words removed.
     """
     text = normalize(text).split()
     self.total_word_count += len(text)
     return [(word, word_pos) for word_pos, word in enumerate(text)
         if word not in self.stop_words]
Exemple #31
0
def amazon_title(amazon_first_parts, marc_first_parts):
    if normalize(amazon_first_parts[0]) not in titles:
        return False
    if compare_parts(marc_first_parts, amazon_first_parts[1:]):
        if verbose:
            print("match with Amazon title")
        return True
    if match_seq(marc_first_parts, amazon_first_parts[1:]):
        if verbose:
            print("partial match, with Amazon title")
        return True
    return False
Exemple #32
0
def task_from_domain_problem(domain, problem):
    domain_name, domain_requirements, types, type_dict, constants, \
        predicates, predicate_dict, functions, actions, axioms = domain
    task_name, task_domain_name, task_requirements, objects, init, goal, use_metric = problem

    assert domain_name == task_domain_name
    requirements = pddl.Requirements(
        sorted(
            set(domain_requirements.requirements +
                task_requirements.requirements)))
    objects = constants + objects
    check_for_duplicates(
        [o.name for o in objects],
        errmsg="error: duplicate object %r",
        finalmsg="please check :constants and :objects definitions")
    init += [pddl.Atom("=", (obj.name, obj.name)) for obj in objects]

    task = pddl.Task(domain_name, task_name, requirements, types, objects,
                     predicates, functions, init, goal, actions, axioms,
                     use_metric)
    normalize.normalize(task)
    return task
def main():
    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl_parser.open(task_filename=options.task,
                                domain_filename=options.domain)

    with timers.timing("Normalizing task"):
        normalize.normalize(task)
    if options.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    with timers.timing("Writing output"):
        with open("output.sas", "w") as output_file:
            sas_task.output(output_file)
    print("Done! %s" % timer)
Exemple #34
0
def match_name2(name1, name2):
    if name1 == name2:
        return True
    n1_normalized = normalize(name1)
    n2_normalized = normalize(name2)
    if n1_normalized == n2_normalized:
        return True
    n1_parts = split_parts(name1)
    n2_parts = split_parts(name2)
    if compare_parts(n1_parts, n2_parts):
        return True
    if match_seq(n1_parts, n2_parts):
        return True
    if marc_title(n1_parts, n2_parts):
        return True
    if marc_title(n2_parts, n1_parts):
        return True
    if amazon_title(n1_parts, n2_parts):
        return True
    if amazon_title(n2_parts, n1_parts):
        return True
    return False
def gen_data(n=300, dataset='clusters'):
    classes_n = 4
    if dataset == 'clusters':
        data, targets = datasets.make_classification(n_samples=n, n_features=2, n_informative=2, n_redundant=0,
                                                     n_classes=4, class_sep=2.5, n_clusters_per_class=1)
    elif dataset == 'circles':
        data, targets = datasets.make_circles(
            n_samples=n, shuffle=True, noise=0.1, random_state=None, factor=0.1)
    elif dataset == 'moons':
        data, targets = datasets.make_moons(n_samples=n, shuffle=True, noise=0.2)

    train_data, valid_data, test_data = partition(data, 3)
    train_targets, valid_targets, test_targets = partition(targets, 3)

    train_data = normalize(train_data)
    test_data = normalize(test_data)
    valid_data = normalize(valid_data)

    train_set = to_one_hot_vector_targets(classes_n, zip(train_data, train_targets))
    valid_set = to_one_hot_vector_targets(classes_n, zip(valid_data, valid_targets))
    test_set = to_one_hot_vector_targets(classes_n, zip(test_data, test_targets))

    return train_set, valid_set, test_set
Exemple #36
0
def main(transaction_file, product_file):
    # FEATURES
    features = [
        'brand', 'color', 'color_web', 'fit', 'heel_height', 'heel_shape',
        'main_group', 'material', 'material_inside', 'material_inner_sole',
        'material_outer_sole', 'removable_footbed', 'season', 'shaft_height',
        'shaft_width', 'subgroup'
    ]

    # DATAFRAMES
    transactions, products = read.all(transaction_file, product_file)

    # RETURNS
    # Get list of returns and number of returns.
    print("Counting number and percentage of returns...")
    no_transactions = counts.get(transactions, "transactions")
    returned_transactions, no_returns = returns.get(transactions,
                                                    no_transactions)

    # CORRECT DATA
    # Unique article numbers are already used and commented out in correct.py
    print("Correcting data...")
    transactions, products = correct.all(transactions, products)

    # TRANSACTION & PRODUCT COUNT
    print("Counting transactions and products...")
    # Get number of transactions
    no_transactions = counts.get(transactions, "transactions")
    # Get average transactions per customer + max per customer
    unknown, avg_transactions, max_transactions = counts.transactions_per_customer(
        transactions)
    # Get number of products
    no_products = counts.get(products, "products")

    # PLOT FREQUENCIES PROPERTY VALUES
    # Make and save plots for product properties. Figures are saved in folder /output/.
    #print("Plotting frequencies for article property values...")
    #property_frequencies.plot(transactions, products)

    # Print lines x until y from all transactions.
    #print("Getting selected lines...")
    #selected_lines = lines.select(transactions, 0, 3)

    # NORMALIZE FEATURES
    print("Normalizing features...")
    products = normalize.normalize(
        products, ['shaft_height', 'shaft_width', 'heel_height'])
    # CALCULATE PRODUCT SIMILARITY
    print("Calculating product similarity...")
    get_similarity(products, features)
Exemple #37
0
def compare_publisher(amazon, marc):
    if 'publishers' not in amazon or 'publishers' not in marc:
        return ('publishers', 'either missing', 0)

    assert 'publishers' in amazon and 'publishers' in marc
    for amazon_pub in amazon['publishers']:
        norm_amazon = normalize(amazon_pub)
        for marc_pub in marc['publishers']:
            norm_marc = normalize(marc_pub)
            if norm_amazon == norm_marc:
                return ('publishers', 'match', 100)


#            if re_press.sub('', norm_amazon) == re_press.sub('', norm_marc):
#                return ('publishers', 'match', 100)
            if substr_match(norm_amazon, norm_marc):
                return ('publishers', 'occur within the other', 100)
            if substr_match(norm_amazon.replace(' ', ''),
                            norm_marc.replace(' ', '')):
                return ('publishers', 'occur within the other', 100)
            if short_part_publisher_match(norm_amazon, norm_marc):
                return ('publishers', 'match', 100)
    return ('publishers', 'mismatch', -25)
Exemple #38
0
def handle_archive(file: Path, root_folder: Path, dir_name: str):
    target_folder = root_folder / dir_name
    target_folder.mkdir(exist_ok=True)  # create folder archives
    ext = Path(file).suffix
    folder_for_archive = normalize(file.name.replace(ext, ""))
    archive_folder = target_folder / folder_for_archive
    archive_folder.mkdir(exist_ok=True)  # create folder archives/name_archive
    try:
        shutil.unpack_archive(str(file.resolve()),
                              str(archive_folder.resolve()))
    except shutil.ReadError:
        archive_folder.rmdir()  # если не успешно удаляем папку под архив
        return None
    file.unlink()  # Если успешно удаляем архив
Exemple #39
0
def main():
    print("-------------POND Translator-----------")
    args = parse_args()

    timer = timers.Timer()
    with timers.timing("Parsing", True):
        task = pddl.open(task_filename=args.task, domain_filename=args.domain)

        print()
        print("Problem Filename = " + args.task)
        print("Domain Filename = " + args.domain)
        print()

    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    if args.generate_relaxed_task:
        # Remove delete effects.
        for action in task.actions:
            for index, effect in reversed(list(enumerate(action.effects))):
                if effect.literal.negated:
                    del action.effects[index]

    sas_task = pddl_to_sas(task)
    dump_statistics(sas_task)

    if not sas_task is None:
        with timers.timing("Writing output"):
            with open(
                    "..\\webapps\\LunaPlanner\\translator_output\\output.sas",
                    "w") as output_file:
                sas_task.output(output_file)

        print()
        print("SAS file saved at: " + output_file.name)

        print("Done! %s" % timer)
Exemple #40
0
def build_titles(title):
    """
    Uses a full title to create normalized and short title versions.

    :param str title: Full title of an edition
    :rtype: dict
    :return: An expanded set of title variations
    """
    normalized_title = normalize(title).lower()
    titles = [title, normalized_title]
    if title.find(' & ') != -1:
        t = title.replace(" & ", " and ")
        titles.append(t)
        titles.append(normalize(t))
    t2 = []
    for t in titles:
        if t.lower().startswith('the '):
            t2.append(t[4:])
        elif t.lower().startswith('a '):
            t2.append(t[2:])
    titles += t2

    if re_amazon_title_paren.match(title):
        t2 = []
        for t in titles:
            m = re_amazon_title_paren.match(t)
            if m:
                t2.append(m.group(1))
                t2.append(normalize(m.group(1)))
        titles += t2

    return {
        'full_title': title,
        'normalized_title': normalized_title,
        'titles': titles,
        'short_title': normalized_title[:25],
    }
def instantiate_task(task, check_infeasible=True, **kwargs):
    start_time = time()
    print()
    normalize.normalize(task)
    if FD_INSTANTIATE:
        relaxed_reachable, atoms, actions, axioms, reachable_action_params = instantiate.explore(
            task)
    else:
        relaxed_reachable, atoms, actions, axioms = instantiate_domain(
            task, **kwargs)
        reachable_action_params = get_reachable_action_params(actions)
    #for atom in sorted(filter(lambda a: isinstance(a, pddl.Literal), set(task.init) | set(atoms)),
    #                   key=lambda a: a.predicate):
    #    print(fact_from_fd(atom))
    #print(axioms)
    #for i, action in enumerate(sorted(actions, key=lambda a: a.name)):
    #    print(i, transform_action_args(pddl_from_instance(action), obj_from_pddl))
    print('Infeasible:', not relaxed_reachable)
    print('Instantiation time:', elapsed_time(start_time))
    if check_infeasible and not relaxed_reachable:
        return None
    goal_list = instantiate_goal(task.goal)
    return InstantiatedTask(task, atoms, actions, axioms,
                            reachable_action_params, goal_list)
Exemple #42
0
def main(task=None,
         sas_fname=None,
         max_num_actions=float("inf"),
         pg_generator=None):
    timer = timers.Timer()
    if task is None:
        import options
        domain_fname = options.domain
        task_fname = options.task
        sas_fname = options.sas_file
        with timers.timing("Parsing", True):
            task = pddl_parser.open(domain_filename=domain_fname,
                                    task_filename=task_fname)

    with timers.timing("Normalizing task"):
        normalize.normalize(task)

    sas_task = pddl_to_sas(task, max_num_actions, pg_generator)
    dump_statistics(sas_task)

    with timers.timing("Writing output"):
        with open(sas_fname, "w") as output_file:
            sas_task.output(output_file)
    print("Done! %s" % timer)
Exemple #43
0
def amazon_spaced_name(amazon, marc):
    len_amazon = len(amazon)
    if len_amazon != 30 and len_amazon != 31:
        return False
    m = re_amazon_space_name.search(amazon)
    if not m:
        return False
    amazon_surname = m.group(1)
    if normalize(amazon_surname) == normalize(marc):
        return True
    amazon_initals = m.group(2)
    m = re_marc_name.match(marc)
    if not m:
        return False
    marc_surname = m.group(1)
    if normalize(amazon_surname) != normalize(marc_surname):
        return False
    marc_first_parts = split_parts(m.group(2))
    amazon_first_parts = [x for x in amazon_initals]
    if compare_parts(marc_first_parts, amazon_first_parts):
        return True
    if match_seq(amazon_first_parts, marc_first_parts):
        return True
    return False
Exemple #44
0
def forward(P, pi, sigma, mu, X):
    #P is the transition matrix.
    #L is the local evidence vector L[i,j] means
    #T deduced from X
    #X as observed data.
    #the resulting alpha array: alpha(n,j) means with obervation 1-n, the hidden zn=j
    K = P.shape[0]
    T = X.shape[0]

    #initialize
    alpha = np.zeros((T, K))
    Z = np.zeros(T)
    L = np.zeros((T, K))
    for i in range(T):
        for j in range(K):
            L[i, j] = mvnorm.pdf(X[i], mean=mu[j], cov=sigma[j])

    [alpha[0, :], Z[0]] = normalize(np.multiply(L[0, :], pi))
    for i in range(1, T):
        [alpha[i, :], Z[i]
         ] = normalize(np.multiply(L[i, :],
                                   P.transpose().dot(alpha[i - 1, :])))

    return (alpha, Z)
Exemple #45
0
def forback(P, pi, sigma, mu, X):
    T = X.shape[0]
    K = mu.shape[0]
    alpha, _ = forward(P, pi, sigma, mu, X)
    beta = backward(P, pi, sigma, mu, X)
    beta.shape

    gamma = np.zeros((T, K))
    for i in range(T):
        for j in range(K):
            gamma[i][j] = alpha[i][j] * beta[i][j]

    for i in range(T):
        gamma[i, :], _ = normalize(gamma[i, :])
    return gamma
Exemple #46
0
def readData():

    #assuming we already have db

    collection = db.newData
    cursor = collection.find({})
    #cursor = collection.find({"_id":ObjectId("60f3d5185b92c5b595de2d72")})
    names = []
    for document in cursor:
        damn = []
        for participant in document["A"]:
            addData(participant)
            damn.append(
                (participant["championName"], participant["teamPosition"]))
            #print(participant["championName"],participant["teamPosition"])
        for participant in document["B"]:
            addData(participant)
            damn.append(
                (participant["championName"], participant["teamPosition"]))
            #print(participant["championName"],participant["teamPosition"])
        names.append(damn)
    prep_x()

    normalized = np.transpose(np.array(normalize(total)))
    print(normalized.shape)
    print(len(labels))

    #X_train, X_test, y_train, y_test = train_test_split(normalized,labels,random_state=1, test_size=0.1)
    sc_X = StandardScaler()
    #X_trainscaled=sc_X.fit_transform(X_train)

    X_test = normalized
    y_test = labels

    X_testscaled = sc_X.fit_transform(X_test)
    newclf = joblib.load('my_model.pkl')
    #clf = MLPClassifier(hidden_layer_sizes=(256,128,64,32),activation="relu",random_state=1,verbose=True,max_iter=20).fit(X_trainscaled, y_train)
    y_pred = newclf.predict(X_testscaled)
    for i in range(len(y_pred)):
        if y_pred[i] != y_test[i]:
            print(names[i // 10])
            print(
                "predicted %s to play in lane %s(predict) instead of lane %s(true)"
                % (names[i // 10][i % 10], positionsDict[y_pred[i]],
                   positionsDict[y_test[i]]))
            print((i % 10, y_pred[i], y_test[i]))
        #print("prediction is %r, true label is %r" % (y_pred[i],y_test[i]))
    print(newclf.score(X_testscaled, y_test))
Exemple #47
0
    def __call__(self, message: str) -> Result:
        check = normalize(message).lower()
        changed = False

        check = "".join([l for l in check if l in LETTERS])

        check = check.split(" ")

        tokens = []

        for word in self.words:
            if word in check:
                message = sub(word, "#" * len(word), message, IGNORECASE)
                changed = True
                tokens.append(word)

        return Result(message, changed, tokens)
Exemple #48
0
 def timeEvolution(self, f, dt, k, v=[-1, 0, 1]):
     """ when called, uses an initial wavefunction f (array-like), the
         timestep dt (float), the time sampling k (int), and the relative
         vertices in v (array-like) to calculate the full
         time-evolution of the wavefunction (array-like) """
     # calculate the time propagation operator if one is not provided
     U = self.propagationOperator(dt, v)
     # normalize the wavefunction
     f = normalize(self.dx, self.n, f)
     # prepare the full time-evolution array
     F = np.zeros([len(f), k], dtype='complex128')
     # initialize time-evolution array with initial wavefunction
     F[:, 0] = f
     # loop through time samples to generate time-evolution
     for i in xrange(1, k):
         F[:, i] = U * F[:, i - 1]
     return F
Exemple #49
0
def main(transaction_file, product_file, n):
    # FEATURES
    features = [
        'brand', 'color', 'color_web', 'fit', 'heel_height', 'heel_shape',
        'main_group', 'material', 'material_inside', 'material_inner_sole',
        'material_outer_sole', 'removable_footbed', 'season', 'shaft_height',
        'shaft_width', 'subgroup'
    ]

    # READ DATA
    transactions, products = read.all(transaction_file, product_file)
    # Take smaller sample
    #transactions = transactions.sample(n)
    transactions = transactions[:n]
    # CORRECT DATA
    # Unique article numbers are already used and commented out in correct.py
    transactions, products = correct.all(transactions, products)

    # NORMALIZE FEATURES
    print("Normalizing features...")
    products = normalize.normalize(
        products, ['shaft_height', 'shaft_width', 'heel_height'])

    # GET REPEAT CUSTOMERS
    # Make series with amount of purchases for each customer.
    print("Getting repeat customers...")
    all_customers = transactions['customer'].value_counts()
    # List all customers with > 1 purchase.
    repeat_customers = all_customers[all_customers > 1]

    # Get similarity score with other purchases from customer.
    print("Calculating similarity from related purchases...")
    sims, article_ids = history_similarity(transactions, products, features,
                                           repeat_customers[1:])
    # Get average similarity score between products from sample.
    print("Calculating average similarity from sample...")
    gen_mean = average_similarity(products, features)

    # Get statistics
    print("Getting statistics...")
    sim_stats = get_stats(sims)

    # Save similarities, mean, median and standard deviation.
    print("Saving data...")
    save_results(article_ids, sims, sim_stats, gen_mean)
def eigenVectors(n, dx, A):
    """ given sample numbering n (array-like), sample separation dx
        (array-like), and an operator A (array-like), calculates the
        eigenvalues (array-like) and the eigenvectors (array-like) and returns
        them in a list """
    # calculate total number of samples
    N = np.prod(n)
    # calculate the first sqrt(N) eigenvalues/vectors
    (u, v) = spsplin.eigsh(A, int(np.sqrt(N)), None, None, 'SM')
    # calculate shape of v
    m = np.shape(v)
    # for each eigenvector
    for i in xrange(0, m[1]):
        # normalize each eigenvector
        v[:, i] = normalize(dx, n, v[:, i])
    # return eigenvalues and eigenvectors in a list
    ev = [u, v]
    return ev
Exemple #51
0
def handle_archive(filename: Path, target_folder: Path):
    # Создаем папку для архивов
    target_folder.mkdir(exist_ok=True, parents=True)
    #  Создаем папку куда распаковываем архив
    # Берем суффикс у файла и убираем replace(filename.suffix, '')
    folder_for_file = target_folder / \
        normalize(filename.name.replace(filename.suffix, ''))
    #  создаем папку для архива с именем файла

    folder_for_file.mkdir(exist_ok=True, parents=True)
    try:
        shutil.unpack_archive(str(filename.resolve()),
                              str(folder_for_file.resolve()))
    except shutil.ReadError:
        print(f'Обман - это не архив {filename}!')
        folder_for_file.rmdir()
        return None
    filename.unlink()
    def _make_property(self, review_dict_list: list) -> tuple:
        """
        review_dict's keys are 'date', 'star', 'vote', 'name', 'title' and 'review' 
        """
        reviews = OrderedDict()
        for idx, review_dict in enumerate(review_dict_list):
            review = normalize(review_dict['review'])
            reviews[idx] = review

        text_list = [[
            term.word for term in self._tokenizer.get_baseforms(review)
        ] for review in reviews.values()]

        dictionary = Dictionary(text_list)
        dictionary.filter_extremes(no_below=1, no_above=0.6)
        corpus = [dictionary.doc2bow(words) for words in text_list]

        return corpus, dictionary
Exemple #53
0
def ae_predict(data):

	json_file = open('model.json', 'r')
	loaded_model_json = json_file.read()
	json_file.close()
	loaded_model = model_from_json(loaded_model_json)
	loaded_model.load_weights("model.h5")

	#----------get output from latent space

	data = normalize(data/256)
	data = data/data.max()
	print("data range", data.min(), data.max())
	layer_name = 'latent_space'
	intermediate_layer_model = Model(inputs=loaded_model.input,
	                                 outputs=loaded_model.get_layer(layer_name).output)
	intermediate_output = intermediate_layer_model.predict(data)

	return intermediate_output
Exemple #54
0
def sineramp(sze=[256, 512], amp=12.5, wavelen=8, p=2):
    if len(sze) == 1:
        rows, cols = sze, sze
    elif len(sze) == 2:
        rows, cols = sze
    else:
        print('size must be a 1 or 2 element vector')
    '''Adjust width of image so that we have an integer number of cycles of
    the sinewave.  This is helps should one be using the test image to
    evaluate a cyclic colour map.  However you will still see a slight
    cyclic discontinuity at the top of the image, though this will
    disappear at the bottom of the test image'''

    cycles = np.round(cols / wavelen)
    cols = cycles * wavelen

    # Sine wave
    x = np.arange(0, cols - 1)
    fx = amp * np.sin(1 / wavelen * 2 * np.pi * x)

    # Vertical modulating function
    A = (np.arange(rows - 1, 0, -1) / (rows - 1))**p
    #     A = ([(rows-1):-1:0]/(rows-1))**p
    im = A[:, np.newaxis] * fx[np.newaxis, :]
    #     print(im.shape)

    # Add ramp
    ramp, _ = np.meshgrid(np.arange(0, cols - 1), np.arange(1,
                                                            rows)) / (cols - 1)
    #     print(ramp.shape)
    #     ramp = meshgrid(0:(cols-1), 1:rows)/(cols-1)
    im = im + ramp * (255 - 2 * amp)
    #     print(im.shape)

    # Now normalise each row so that it spans the full data range from 0 to 255.
    # This ensures that, at the lower edge of the image, the full colour map is
    # displayed.  It also helps with the evaluation of cyclic colour maps though
    # a small cyclic discontinuity will remain at the top of the test image.
    for r in range(rows - 1):
        im[r, :] = normalize.normalize(im[r, :])
    im = im * 255

    return im
def save_gscale_img(fn, grayimg, mask_erode, mask_dilate):
    mask_erode = mask_erode.astype(float)
    mask_dilate = mask_dilate.astype(float)
    grayimg = grayimg.astype(float)

    # include all pixels 'on' in eroded binary shadow
    # exclude all pixels 'off' in dilated binary shadow
    img = grayimg
    img = np.maximum(img, mask_erode)
    img = np.minimum(img, mask_dilate)
    if do_normalize:
        img = normalize(img, figure_size, outsize)
        out = 1 - img
        out = out * 255
        out = out.astype(int)
    else:
        out = 1 - img
        out = out * 255
        out = out.astype(int)
        out = misc.imresize(out, (outsize, outsize))
    misc.imsave(fn, out)
Exemple #56
0
def page2(request):
    if request.method == "POST":
        print(request.POST)
        str1 = "media/documents/"
        str2 = request.FILES['docfile'].name
        if os.path.isfile(str1 + str2):
            os.remove(str1 + str2)
        newdoc = Document(docfile=request.FILES['docfile'])
        newdoc.save()
        request.session['classes'] = request.POST['classes']
        request.session['remove'] = request.POST['remove']
        request.session['docfile'] = request.FILES['docfile'].name
        colonne = range(3)
        data = open_dataset(request.session['docfile'])
        normalizedData = normalize(data)
        stats = getStats(normalizedData)
        return render(request, 'kmeans/page2.html', {
            'colonne': colonne,
            'stats': stats
        })
    else:
        return render(request, 'kmeans/page1.html', {})
Exemple #57
0
def inertia_plot():
    from normalize import normalize
    import pandas as pd
    import numpy as np
    from sklearn.cluster import KMeans
    from sklearn.metrics import silhouette_samples, silhouette_score
    import matplotlib.pyplot as plt
    #data_set_1 = pd.read_csv("SongCSV.csv")

    new_data = normalize()
    data = new_data.iloc[:, 18:19]
    temp_1 = 10**8
    x = np.linspace(1, 20, num=20)
    array = []

    for i in range(1, 21):
        kmeans = KMeans(n_clusters=i, max_iter=200, random_state=0,
                        verbose=0).fit(data)
        array.append(kmeans.inertia_)

    plt.plot(x, array, '-x')
    return plt.show
Exemple #58
0
def process_data(filename):
    '''
        Read data from file and prepare for processing
    '''

    # save cwd and change cwd
    saved_cwd = getcwd()
    chdir('datasets')

    # construct array from tab delimited file
    data, labels_list = to_array(filename)

    # create mapping of labels associated with dataset and key to index labels
    labeling = categorize_labels(labels_list)

    # normalize data
    normalizing = normalize(data)

    # restore saved cwd
    chdir(saved_cwd)

    return data, normalizing, labeling
Exemple #59
0
def get_image(lung, deterministic):
    lung[lung == -2000] = 0
    #lung = lung - 1024
    truth = np.zeros_like(lung)
    outside = np.where(lung == 0, 1, 0)
    #######################

    outside = np.array(outside, dtype=np.float32)

    truth = np.array(np.round(truth), dtype=np.int64)
    outside = np.array(np.round(outside), dtype=np.int64)

    #Set label of outside pixels to -10
    truth = truth - (outside * 10)

    lung = lung * (1 - outside)
    lung = lung - outside * 3000

    if P.INPUT_SIZE > 0:
        lung = crop_or_pad(lung, INPUT_SIZE, -3000)
        truth = crop_or_pad(truth, OUTPUT_SIZE, 0)
        outside = crop_or_pad(outside, OUTPUT_SIZE, 1)
    else:
        out_size = output_size_for_input(lung.shape[1], P.DEPTH)
        #lung = crop_or_pad(lung, INPUT_SIZE, -1000)
        truth = crop_or_pad(truth, out_size, 0)
        outside = crop_or_pad(outside, out_size, 1)

    lung = normalize.normalize(lung)
    lung = np.expand_dims(np.expand_dims(lung, axis=0), axis=0)

    if P.ZERO_CENTER:
        lung = lung - P.MEAN_PIXEL

    truth = np.array(np.expand_dims(np.expand_dims(truth, axis=0), axis=0),
                     dtype=np.int64)

    return lung, truth
Exemple #60
0
def save():
    json_from_client = flask.request.json
    entry = normalize(json_from_client['entry'])
    entry = add_class(entry)

    if json_from_client['edit'] == 'edit':
        index = json_from_client['index']
        try:
            res = es.index(index='temp_dict',
                           doc_type='entry',
                           id=index,
                           body=entry)
            result = {'feedback': 'success', 'id': res['_id']}
        except:
            result = {'feedback': 'failure'}
    else:
        try:
            res = es.index(index='temp_dict', doc_type='entry', body=entry)
            result = {'feedback': 'success', 'id': res['_id']}
        except:
            result = {'feedback': 'failure'}

    return flask.jsonify(result=result)