Esempio n. 1
0
    def init_wordprocessers(self):
        """
        Initialize Word processors depending on the language
        """

        if self.language.get() == 'English':
            self.stemmer = stem.snowball.EnglishStemmer()
            self.corr = correct('en')
            return 'en'
        else:
            self.stemmer = stem.snowball.GermanStemmer()
            self.corr = correct('de')
            return 'de'
Esempio n. 2
0
def dispatch(values=None):

    if (values == None):
        return {'error': 'parameter is missing'}
    if (not (isinstance(values, dict))):
        return {'error': 'parameter is not a dictionary'}
    if ('error' in values):
        values.pop('error')
        return values
    if (not ('op' in values) or values['op'] == ''):
        values['error'] = 'no op  is specified'
        return values

    #Perform designated function
    if (values['op'] == 'adjust'):
        result = adjust.adjust(values)
        return result
    elif (values['op'] == 'predict'):
        result = predict.predict(values)
        return result
    elif (values['op'] == 'correct'):
        result = correct.correct(values)
        return result
    elif (values['op'] == 'locate'):
        result = locate.locate(values)
        return result
    else:
        values['error'] = 'op is not a legal operation'
        return values
Esempio n. 3
0
    def Spellcheck(self, event):
        # Spellcheck the word preceding the insertion point
        index = self.text.search(r'\s', "insert", backwards=True, regexp=True)
        if index == "":
            index ="1.0"
        else:
            index = self.text.index("%s+1c" % index)
        # assign last word typed to "word"
        word = self.text.get(index, "insert")
        """if word.lower() in self._words:
            self.text.tag_remove("misspelled", index, "%s+%dc" % (index, len(word)))"""
        # if there is a match, increment frequency of the word
        print(word)
        words = BK_TREE.BKTree.query(self._words,word.strip('.,;\"!:?\'()').lower(),2)
        print(words)
        # if not in tree, run correct and replace with most reasonable replacement
        if len(words) == 0:
            self.text.tag_add("misspelled", index, "%s+%dc" % (index, len(word)))
        # if the word is not in the dictionary replace it
        elif not (words[0][1] == 0):
            # The replacement word is the result of correct on the word we typed and the array that query returns
            new_word = correct.correct(word,words)
            # sequence of checks to add back punctuation
            if not word == '':
                start = word[0]
                end = word[-1]
                if start in ("\'","(","{","[","$",'\"',"*"):
                    new_word = start + new_word
                if end in ("\'",")","]",".",",","\"",";",":","?","!","*"):
                    new_word = new_word + end

            self.text.delete(index, "%s+%dc" % (index, len(word)))
            self.text.insert(index,new_word)
Esempio n. 4
0
def mutate(solution: Solution):
    for path in solution.paths:
        if random.random() < MUTATION_PROBABILITY:
            if len(path.points) > 3:
                change_point = random.randint(1, len(path.points) - 3)
                change_x = random.randint(0, 1)
                dir = 1 - 2 * random.randint(0, 1)
                path.points[change_point].move(dir * change_x,
                                               dir * (1 - change_x))
                if random.randint(0, 1) == 0:
                    path.points[change_point + 1].move(dir * change_x,
                                                       dir * (1 - change_x))
            else:
                change = path.points[1] - path.points[0]
                if change.x == 0:
                    if abs(change.y) > 1:
                        change.x = 1 - 2 * random.randint(0, 1)
                        change.y = math.copysign(1, change.y) * random.randint(
                            1, abs(change.y))
                elif change.y == 0:
                    if abs(change.x) > 1:
                        change.y = 1 - 2 * random.randint(0, 1)
                        change.x = math.copysign(1, change.x) * random.randint(
                            1, abs(change.x))
                path.points.insert(1, path.points[0] + change)
        path.points = correct(path.points)
    return solution
Esempio n. 5
0
def generate_path(board: Board, start: P2D, end: P2D) -> Path:
    path = Path(start, end)
    points = [start]
    # points.extend(generate_random_points(board, start, end))
    points.extend(generate_random_points(board))
    points.append(end)
    path.points = correct(points)
    return path
Esempio n. 6
0
def get_text(img,
             classifier,
             bg_thresh=None,
             resize=None,
             min_char_dist=0,
             min_char_pixels=1,
             min_line_dist=0,
             min_line_pixels=1,
             spell_check=False,
             **kwargs):
    """
    Get the text of an image with a classifier.

    Args:
        get_text_from_regions() arguments:
            img
            classifier
            bg_tresh
            resize
            **kwargs

        min_line_dist (Optional[int]): Minimum distance between each line
            in line regions.
        min_line_pixels (Optional[int]): Minimum number of pixels along
            the height of an image to be considered as containing text.
            Defaults to 1.
        min_char_dist (Optional[int]): Minimum distance between each character
            in character regions. Defaults to 0.
        min_char_pixels (Optional[int]): Minimum number of pixels along a
            column in a line region to be considered as containing text.
            Defaults to 1.
        spell_check (Optional[bool]): Use spell check if True.
            Defaults to False.

    Returns:
        str: The string extracted from the image.
    """
    line_regs = line_regions(img,
                             bg_thresh=bg_thresh,
                             min_dist=min_line_dist,
                             min_pixels=min_line_pixels)
    char_regs = character_regions(img,
                                  line_regs,
                                  bg_thresh=bg_thresh,
                                  min_dist=min_char_dist,
                                  min_pixels=min_char_pixels)
    text = get_text_from_regions(img,
                                 line_regs,
                                 char_regs,
                                 classifier,
                                 resize=resize,
                                 **kwargs)

    if spell_check:
        text = " ".join(correct(word) for word in text.split())

    return text
Esempio n. 7
0
def main():

    test = "agc would is going to famaly 2 3 verygood u know 实现 , ."
    tokens, flag_list = judge_word(test)

    for i,item in enumerate(tokens):
       if flag_list[i]:
          logging.info("{} is a word".format(item))
       else:
          logging.info("{} is not a word, may be {}".format(item, correct(item)))
Esempio n. 8
0
    def makeTag(self, field, value):
        fix = correct.correct()
        newval = str(value)
        #newval = html.unescape(newval)
        newval = newval.replace('&', 'and')
        newval = newval.replace('"', '')
        #newval = newval.replace('><', '')
        tag = dict()
        # logging.debug("OSM:makeTag(field=%r, value=%r)" % (field, newval))

        try:
            newtag = self.ctable.match(field)
        except Exception as inst:
            logging.warning("MISSING Field: %r, %r" % (field, newval))
            # If it's not in the conversion file, assume it maps directly
            # to an official OSM tag.
            newtag = field


        newval = self.ctable.attribute(newtag, newval)
        #logging.debug("ATTRS1: %r %r" % (newtag, newval))
        change = newval.split('=')
        if len(change) > 1:
            newtag = change[0]
            newval = change[1]

        # name tags, usually roads or addresses, often have to be tweaked
        # for OSM standards
        if (newtag == "name") or (newtag == "alt_name"):
            newval = string.capwords(fix.alphaNumeric(newval))
            newval = fix.abbreviation(newval)
            newval = fix.compass(newval)

        # This is a hack because the CO address data truncates the street,
        # and we need the whole thing so routing will work to an address.
        if newtag == 'addr:full':
            self.full = re.sub(" Unit .*", '', newval)
            newval = re.sub("^[0-9]* ", '', self.full)
            newtag = "add:street"
            # logging.debug("FIXME: FULL %" % self.full)
        elif newtag == 'addr:housenumber':
            # logging.debug("FIXME: NUM")
            self.num = newval
        elif newtag == 'addr:street':
            if self.full is not None:
                newval = re.sub("^[0-9]* ", '', self.full)
                # newval = self.full.replace(self.num, '')

        self.full = None
        self.addr = None
        tag[newtag] = newval
        # tag[newtag] = string.capwords(newval)

        #print("ATTRS2: %r %r" % (newtag, newval))
        return tag
Esempio n. 9
0
def checker(ans, inp):
	res = False
	msg = ""
	corr = correct.correct(inp, "ru")
	if (ans == corr):
		res = True
		if (corr != inp):
			msg = "(точнее: " + corr + ")"
	return {
		"result": res,
		"msg": msg
	}
def word_stem_stop_word(reply_text,num):
	stopwordsfile = open('stopwords.txt')
	stopwords = stopwordsfile.read().split('\r\n')
	nltk_word = nltk.word_tokenize(reply_text)
	nltk_word = nltk.pos_tag(nltk_word)
	reply = []
	proper_nouns = Set([])
	for word, tag in nltk_word:
		if str(word.lower()) not in stopwords:
			if (tag == 'NNP' or tag == 'NNPS'):
				proper_nouns.add(word.lower())
			else:
				word = correct.correct(word)
				word = porter.stem(word)
				word = correct.correct(word)
			reply.append(word.lower())
	#print reply
	#print proper_nouns
	reply = ' '.join(reply)
	if num:
		return reply,proper_nouns
	else:
		return reply
Esempio n. 11
0
def correct_route():
    print(request.args)
    query = request.args.get('query')
    print("[Router] correct: {}".format(query))

    corrected = correct(query)
    if corrected is None:
        corrected = ''

    result = {
        'corrected': corrected,
    }
    print(result)
    return jsonify(result)
Esempio n. 12
0
def get_text(img, classifier, bg_thresh=None, resize=None, min_char_dist=0,
             min_char_pixels=1, min_line_dist=0, min_line_pixels=1,
             spell_check=False, **kwargs):
    """
    Get the text of an image with a classifier.

    Args:
        get_text_from_regions() arguments:
            img
            classifier
            bg_tresh
            resize
            **kwargs

        min_line_dist (Optional[int]): Minimum distance between each line
            in line regions.
        min_line_pixels (Optional[int]): Minimum number of pixels along
            the height of an image to be considered as containing text.
            Defaults to 1.
        min_char_dist (Optional[int]): Minimum distance between each character
            in character regions. Defaults to 0.
        min_char_pixels (Optional[int]): Minimum number of pixels along a
            column in a line region to be considered as containing text.
            Defaults to 1.
        spell_check (Optional[bool]): Use spell check if True.
            Defaults to False.

    Returns:
        str: The string extracted from the image.
    """
    line_regs = line_regions(img, bg_thresh=bg_thresh, min_dist=min_line_dist,
                             min_pixels=min_line_pixels)
    char_regs = character_regions(
        img, line_regs, bg_thresh=bg_thresh, min_dist=min_char_dist,
        min_pixels=min_char_pixels)
    text = get_text_from_regions(img, line_regs, char_regs, classifier,
                                 resize=resize, **kwargs)

    if spell_check:
        text = " ".join(correct(word) for word in text.split())

    return text
Esempio n. 13
0
def dispatch(values=None):

    #Validate parm
    if(values == None):
        return {'error': 'parameter is missing'}
    if(not(isinstance(values,dict))):
        return {'error': 'parameter is not a dictionary'}
    if (not('op' in values)):
        values['error'] = 'no op is specified'
        return values

    #Perform designated function
    if(values['op'] == 'adjust'):
        return adjust.adjust(values)
    elif(values['op'] == 'predict'):
        return predict.predict(values)    #This calculation is stubbed out
    elif(values['op'] == 'correct'):
        return correct.correct(values)    #This calculation is stubbed out
    elif(values['op'] == 'locate'):
        return values    #This calculation is stubbed out
    else:
        values['error'] = 'op is not a legal operation'
        return values
Esempio n. 14
0
def searchByCourse(requset):
    course = requset.GET.get("course", "")
    if course == "":
        return HttpResponse("Request error")
    print type(course)
    course = correct.correct(course)
    print type(course)
    xml = cache.get("C_" + course)
    # check whether has some data in redis
    if xml:
        return HttpResponse(xml, content_type="application/xml")
    try:
        # check whether this couse is in database
        c = Courses.objects.get(cname=course)
        # return the historic result
        xml = getExistCourseRecord(c)
        cache.set("C_" + course, xml, 60 * 60 * 24)
        return HttpResponse(xml, content_type="application/xml")
    except Courses.DoesNotExist:
        # this course has not been searched before
        # search it, and store the result in database
        cr = CourseReptile()
        t1 = time.time()
        booksNames = cr.course_search(course)
        print "search books by course cost : " + repr(time.time() - t1) + "s"
        # if not correlated book for this course
        if not len(booksNames):
            return HttpResponse("No relative book for this course!")
        c = Courses.objects.create(cname=course, description="")
        c.save()
        # count the similar of bookname and course
        similarNames = []
        for bookName in booksNames:
            p = Levenshtein.ratio(bookName, course)
            similarNames.append((bookName, p))
        # sort the book names by the similar
        booksNames = sorted(similarNames, key=lambda x: x[1], reverse=True)
        print booksNames
        xml = ""
        for bookName, p in booksNames:
            # to be implement. this operation should return a list of dictionary
            sola = solaSpider()
            t1 = time.time()
            books = sola.getBookList(bookName, True)
            print "search books by book cost : " + repr(time.time() - t1) + "s"
            # some database operation
            for book in books:
                print book
                # book is a dictionary
                bookid = storeBookItem(book)
                # construct the return xml
                xml += getBookItemXml(bookid)
                # create the relation for this new course and the the relative book
                r = Relation.objects.create(course=c, bid=bookid, click=0)
                r.save()
        if xml == "":
            return HttpResponse("No relative book for this course!")
        xml = packXml(xml, c.id, "course")
        # write in cache
        cache.set("C_" + course, xml, 60 * 60 * 24)
        return HttpResponse(xml, content_type="application/xml")
Esempio n. 15
0
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#

import logging
import html
import string
import pdb
import re
import correct
import inspect
import dejagnu

dj = dejagnu.dejagnu()

obj = correct.correct()
dj.verbose_level(2)

# Test the compass corrections. ie... N RoadName' becomes 'North RoadName'
instr = "N 126"
x = obj.compass(instr)
dj.matches(x, 'North 126', "correct.compass(North)")

instr = "S 126"
x = obj.compass(instr)
dj.matches(x, 'South 126', "correct.compass(South)")

instr = "E 126"
x = obj.compass(instr)
dj.matches(x, 'East 126', "correct.compass(East)")
Esempio n. 16
0
def to_graph(son):
    """From flat json to python graph"""

    def dumps(item):
        return json.dumps(item, indent=4)

    def printjson(item):
        print(dumps(item))

    categories = json.loads(son)
    categories = [correct(fixrelations(i)) for i in categories]
    independents = filter(
        lambda x: x[strings.based] == strings.independend,
        categories
    )

    def listToDict(keyFunction, values):
        return dict((keyFunction(v), v) for v in values)

    # A list is just a great way to waste time for this usecase
    independents = listToDict(lambda x: x["Name"], independents)

    def addChildTo(parent, child):
        parent[strings.children].append(child)
        return parent

    # Recursivly find the parents. True on succes, False on failure
    # If succesfull the child will be added to the found parents.
    def findparents(child, bases, parents):
        if len(bases) == 0:
            for p in parents:
                addChildTo(p, child)
            return True
        current = bases[0]
        if len(parents) == 0:
            try:
                parents.insert(0, independents[current])
            except KeyError as e:
                printjson(child)
                raise e
            return findparents(child, bases[1:], parents)
        base = next(
            (x
             for x
             in parents[0][strings.children]
             if x[strings.name] == current),
            None
        )
        if base is None:
            if current not in independents:
                if current == child[strings.name]:
                    # ubuntu dependson ubuntu... yes distrowatch that's just
                    # silly
                    return findparents(child, [], parents)
                # the base is not added yet to the structure
                # lets just ignore this one for now.
                return False
            parents.insert(0, independents[current])
            return findparents(child, bases[1:], parents)
        parents[0] = base
        return findparents(child, bases[1:], parents)

    def deepen(collection):
        counter = 0
        while len(collection) > 0:
            current = collection[0]
            basedstr = current[strings.based]
            bases = basedstr.split(",")
            if not findparents(current, bases, []):
                counter += 1
                if counter > len(collection) * 10:
                    printjson(list(collection))
                    raise Exception(
                        "Made five full circles in the deque, the data is " +
                        "just invalid, deque size %i " % counter
                    )
                collection.append(current)
            else:
                counter = 0
            collection.popleft()
        return collection

    from collections import deque
    notindependents = deque(filter(
        lambda x: not x[strings.based] == strings.independend,
        categories
    ))
    deepen(notindependents)
    return dumps(list(map(lambda item: item[1], independents.items())))
Esempio n. 17
0
def process_image(filename):
    shutil.copy(filename, config.tmpDir)
    correct.correct(filename)
Esempio n. 18
0
def generate_midi(config_folder_fd, config_folder_bd, config_folder_corr,
                  score_source, save_folder, initialization_type,
                  number_of_version, duration_gen, num_pass_correct,
                  logger_generate):
    """This function generate the orchestration of a midi piano score
    
    Parameters
    ----------
    config_folder : str
        Absolute path to the configuration folder, i.e. the folder containing the saved model and the results
    score_source : str
        Either a path to a folder containing two midi files (piano and orchestration) or the path toa piano midi files
    number_of_version : int
        Number of version generated in a batch manner. Since the generation process involves sampling it might be interesting to generate several versions
    duration_gen : int
        Length of the generated score (in number of events). Useful for generating only the beginning of the piece.
    logger_generate : logger
        Instanciation of logging. Can be None
    """

    logger_generate.info("#############################################")
    logger_generate.info("Orchestrating : " + score_source)

    # Load parameters
    parameters = pkl.load(
        open(config_folder_fd + '/script_parameters.pkl', 'rb'))
    model_parameters_fd = pkl.load(
        open(config_folder_fd + '/model_params.pkl', 'rb'))
    #
    parameters_bd = pkl.load(
        open(config_folder_bd + '/script_parameters.pkl', 'rb'))
    model_parameters_bd = pkl.load(
        open(config_folder_bd + '/model_params.pkl', 'rb'))
    #
    parameters_corr = pkl.load(
        open(config_folder_corr + '/script_parameters.pkl', 'rb'))
    model_parameters_corr = pkl.load(
        open(config_folder_corr + '/model_params.pkl', 'rb'))

    assert (model_parameters_fd["temporal_order"]
            == model_parameters_bd["temporal_order"]) and (
                model_parameters_fd["temporal_order"]
                == model_parameters_corr["temporal_order"]
            ), "The two model have different seed_size"
    assert (parameters["quantization"] == parameters_bd["quantization"]) and (
        parameters["quantization"] == parameters_corr["quantization"]
    ), "The two model have different quantization"
    assert (parameters["temporal_granularity"]
            == parameters_bd["temporal_granularity"]) and (
                parameters["temporal_granularity"]
                == parameters_corr["temporal_granularity"]
            ), "The two model have different temporal_granularity"
    assert (parameters["instru_mapping"] == parameters_bd["instru_mapping"]
            ) and (parameters["instru_mapping"]
                   == parameters_corr["instru_mapping"]
                   ), "The two model have different instru_mapping"
    assert (parameters["normalizer"] == parameters_bd["normalizer"]) and (
        parameters["normalizer"] == parameters_corr["normalizer"]
    ), "The two model have different normalizer"

    seed_size = max(model_parameters_fd['temporal_order'], 10) - 1

    #######################
    # Load data
    if re.search(r'mid$', score_source):
        pr_piano, event_piano, duration_piano, name_piano, pr_orch, instru_orch, duration = generation_utils.load_solo(
            score_source, parameters["quantization"],
            parameters["binarize_piano"], parameters["temporal_granularity"])
    else:
        if initialization_type == "seed":
            pr_piano, event_piano, duration_piano, name_piano, pr_orch, instru_orch, duration = generation_utils.load_from_pair(
                score_source,
                parameters["quantization"],
                parameters["binarize_piano"],
                parameters["binarize_orch"],
                parameters["temporal_granularity"],
                align_bool=True)
        else:
            pr_piano, event_piano, duration_piano, name_piano, pr_orch, instru_orch, duration = generation_utils.load_from_pair(
                score_source,
                parameters["quantization"],
                parameters["binarize_piano"],
                parameters["binarize_orch"],
                parameters["temporal_granularity"],
                align_bool=False)

    if (duration is None) or (duration < duration_gen):
        logger_generate.info("Track too short to be used")
        return
    ########################

    ########################
    # Shorten
    # Keep only the beginning of the pieces (let's say a 100 events)
    pr_piano = pianoroll_processing.extract_pianoroll_part(
        pr_piano, 0, duration_gen)
    if parameters["duration_piano"]:
        duration_piano = np.asarray(duration_piano[:duration_gen])
    else:
        duration_piano = None
    if parameters["temporal_granularity"] == "event_level":
        event_piano = event_piano[:duration_gen]
    pr_orch = pianoroll_processing.extract_pianoroll_part(
        pr_orch, 0, duration_gen)
    ########################

    ########################
    # Instanciate piano pianoroll
    N_piano = parameters["instru_mapping"]['Piano']['index_max']
    pr_piano_gen = np.zeros((duration_gen, N_piano), dtype=np.float32)
    pr_piano_gen = build_data_aux.cast_small_pr_into_big_pr(
        pr_piano, {}, 0, duration_gen, parameters["instru_mapping"],
        pr_piano_gen)
    pr_piano_gen_flat = pr_piano_gen.sum(axis=1)
    silence_piano = [
        e for e in range(duration_gen) if pr_piano_gen_flat[e] == 0
    ]
    ########################

    ########################
    # Initialize orchestra pianoroll with orchestra seed (choose one)
    N_orchestra = parameters['N_orchestra']
    pr_orchestra_truth = np.zeros((duration_gen, N_orchestra),
                                  dtype=np.float32)
    pr_orchestra_truth = build_data_aux.cast_small_pr_into_big_pr(
        pr_orch, instru_orch, 0, duration_gen, parameters["instru_mapping"],
        pr_orchestra_truth)
    if initialization_type == "seed":
        pr_orchestra_seed = generation_utils.init_with_seed(
            pr_orch, number_of_version, seed_size, N_orchestra, instru_orch,
            parameters["instru_mapping"])
    elif initialization_type == "zeros":
        pr_orchestra_seed = generation_utils.init_with_zeros(
            number_of_version, seed_size, N_orchestra)
    elif initialization_type == "constant":
        const_value = 0.1
        pr_orchestra_seed = generation_utils.init_with_constant(
            number_of_version, seed_size, N_orchestra, const_value)
    elif initialization_type == "random":
        proba_activation = 0.01
        pr_orchestra_seed = generation_utils.init_with_random(
            number_of_version, seed_size, N_orchestra, proba_activation)
    ########################

    #######################################
    # Embed piano
    time_embedding = time.time()
    if parameters['embedded_piano']:
        # Load model
        embedding_path = parameters["embedding_path"]
        embedding_model = torch.load(embedding_path, map_location="cpu")

        # Build embedding (no need to batch here, len(pr_piano_gen) is sufficiently small)
        # Plus no CUDA here because : afradi of mix with TF  +  possibly very long piano chunks
        piano_resize_emb = np.zeros(
            (len(pr_piano_gen), 1, 128))  # Embeddings accetp size 128 samples
        piano_resize_emb[:, 0, parameters["instru_mapping"]['Piano']
                         ['pitch_min']:parameters["instru_mapping"]['Piano']
                         ['pitch_max']] = pr_piano_gen
        piano_resize_emb_TT = torch.tensor(piano_resize_emb)
        piano_embedded_TT = embedding_model(piano_resize_emb_TT.float(), 0)
        pr_piano_gen_embedded = piano_embedded_TT.numpy()
    else:
        pr_piano_gen_embedded = pr_piano_gen
    time_embedding = time.time() - time_embedding
    #######################################

    ########################
    # Inputs' normalization
    normalizer = pkl.load(
        open(os.path.join(config_folder_fd, 'normalizer.pkl'), 'rb'))
    if parameters["embedded_piano"]:  # When using embedding, no normalization
        pr_piano_gen_norm = pr_piano_gen_embedded
    else:
        pr_piano_gen_norm = normalizer.transform(pr_piano_gen_embedded)
    ########################

    ########################
    # Store folder
    string = re.split(r'/', name_piano)[-1]
    name_track = re.sub('piano_solo.mid', '', string)
    generated_folder = save_folder + '/fd_bd_corr_' + initialization_type + '_init/' + name_track
    if not os.path.isdir(generated_folder):
        os.makedirs(generated_folder)
    ########################

    ########################
    # Get trainer
    with open(os.path.join(config_folder_fd, 'which_trainer'), 'r') as ff:
        which_trainer_fd = ff.read()
    # Trainer
    trainer_fd = import_trainer(which_trainer_fd, model_parameters_fd,
                                parameters)
    #
    with open(os.path.join(config_folder_bd, 'which_trainer'), 'r') as ff:
        which_trainer_bd = ff.read()
    # Trainer
    trainer_bd = import_trainer(which_trainer_bd, model_parameters_bd,
                                parameters)
    #
    with open(os.path.join(config_folder_corr, 'which_trainer'), 'r') as ff:
        which_trainer_corr = ff.read()
    # Trainer
    trainer_corr = import_trainer(which_trainer_corr, model_parameters_corr,
                                  parameters)
    ########################

    ############################################################
    # Generate
    ############################################################
    time_generate_0 = time.time()
    model_path = 'model_accuracy'
    # Forward
    pr_orchestra_gen = generate(trainer_fd,
                                pr_piano_gen_norm,
                                silence_piano,
                                duration_piano,
                                config_folder_fd,
                                model_path,
                                pr_orchestra_seed,
                                batch_size=number_of_version)
    prefix_name = 'fd_'
    generation_utils.reconstruct_generation(pr_orchestra_gen, event_piano,
                                            generated_folder, prefix_name,
                                            parameters, seed_size)
    # Backward
    pr_orchestra_seed = pr_orchestra_gen[:, -seed_size:]
    pr_orchestra_gen = generate_backward(trainer_bd,
                                         pr_piano_gen_norm,
                                         silence_piano,
                                         duration_piano,
                                         config_folder_bd,
                                         model_path,
                                         pr_orchestra_seed,
                                         batch_size=number_of_version)
    prefix_name = 'bd_'
    generation_utils.reconstruct_generation(pr_orchestra_gen, event_piano,
                                            generated_folder, prefix_name,
                                            parameters, seed_size)
    # Correction
    for pass_index in range(num_pass_correct):
        pr_orchestra_gen = correct(trainer_corr,
                                   pr_piano_gen_norm,
                                   silence_piano,
                                   duration_piano,
                                   config_folder_corr,
                                   model_path,
                                   pr_orchestra_gen,
                                   batch_size=number_of_version)
        generation_utils.reconstruct_generation(pr_orchestra_gen, event_piano,
                                                generated_folder, prefix_name,
                                                parameters, seed_size)
        prefix_name = 'corr_' + str(pass_index) + '_'
    time_generate_1 = time.time()
    logger_generate.info(
        'TTT : Generating data took {} seconds'.format(time_generate_1 -
                                                       time_generate_0))

    ############################################################
    # Reconstruct and write
    ############################################################
    prefix_name = 'final_'
    generation_utils.reconstruct_generation(pr_orchestra_gen, event_piano,
                                            generated_folder, prefix_name,
                                            parameters, seed_size)
    generation_utils.reconstruct_original(pr_piano_gen, pr_orchestra_truth,
                                          event_piano, generated_folder,
                                          parameters)
    return
Esempio n. 19
0
def test_correct_simple():
    assert correct("L0ND0N") == "LONDON"
    assert correct("DUBL1N") == "DUBLIN"
    assert correct("51NGAP0RE") == "SINGAPORE"
    assert correct("BUDAPE5T") == "BUDAPEST"
    assert correct("PAR15") == "PARIS"
Esempio n. 20
0
        return self.db.query(addr)


#epdb.set_trace()
dd = config(argv)

fcall = plotcalls(dd)
fcall.connect()

kml = kmlfile()
kml.open(dd.get('outfile'))
kml.header("TLFPD Calls")

calldata = open(dd.get('infile'), 'r')
lines = calldata.readlines()
fix = correct.correct()
for line in lines:
    if line[1] == '#':
        continue
    index = line.find(' ')
    number = line[:index]
    street = line[index + 2:]
    street = street.replace("\n", '')
    street = street.strip()
    street = fix.alphaNumeric(street)
    street = fix.abbreviation(street)
    street = fix.compass(street)

    query = "SELECT ST_AsKML(way) from planet_osm_point"
    query += " WHERE \"addr:housenumber\"='" + number + "'"
    query += " AND tags->'addr:street'='" + street + "';"
Esempio n. 21
0
def searchByCourse(requset):
    course = requset.GET.get("course", "")
    if course == "":
        return HttpResponse("Request error")
    print type(course)
    course = correct.correct(course)
    print type(course)
    xml = cache.get("C_" + course)
    # check whether has some data in redis
    if xml:
        return HttpResponse(xml, content_type="application/xml")
    try:
        # check whether this couse is in database
        c = Courses.objects.get(cname = course)
        # return the historic result
        xml =  getExistCourseRecord(c)
        cache.set("C_" + course, xml, 60 * 60 * 24)
        return HttpResponse(xml, content_type="application/xml")
    except Courses.DoesNotExist:
        # this course has not been searched before
        # search it, and store the result in database
        cr = CourseReptile()
        t1 = time.time()
        booksNames = cr.course_search(course)
        print "search books by course cost : " + repr(time.time() - t1) + "s"
        # if not correlated book for this course
        if not len(booksNames):
            return HttpResponse("No relative book for this course!")
        c = Courses.objects.create(cname = course, description = "")
        c.save()
        # count the similar of bookname and course
        similarNames = []
        for bookName in booksNames:
            p = Levenshtein.ratio(bookName, course)
            similarNames.append((bookName, p))
        # sort the book names by the similar
        booksNames = sorted(similarNames, key = lambda x : x[1], reverse = True)
        print booksNames
        xml = ""
        for bookName, p in booksNames:
            # to be implement. this operation should return a list of dictionary
            sola = solaSpider()
            t1 = time.time()
            books = sola.getBookList(bookName, True)
            print "search books by book cost : " + repr(time.time() - t1) + "s"
            # some database operation
            for book in books:
                print book
                # book is a dictionary
                bookid = storeBookItem(book)
                # construct the return xml
                xml += getBookItemXml(bookid)
                # create the relation for this new course and the the relative book
                r = Relation.objects.create(course = c, bid = bookid, click = 0)
                r.save()
        if xml == "":
            return HttpResponse("No relative book for this course!")
        xml = packXml(xml, c.id, "course")
        # write in cache
        cache.set("C_" + course, xml, 60 * 60 * 24)
        return HttpResponse(xml, content_type="application/xml")
import nltk
from nltk.tokenize import RegexpTokenizer
from correct import correct

new_file = open("new_data.txt", "w")
with open("mail_data2.txt", "r") as f:
    for line in f:
        tokenizer = RegexpTokenizer('[A-Za-z.,?!]{1,}')
        a = tokenizer.tokenize(line)
        b = nltk.pos_tag(a)
        for word, tag in b:
            if tag == "NNP":
                new_file.write(word + " ")
            if tag != "NNP":
                word = correct(word.lower())
                new_file.write(word + " ")
        new_file.write("\n")
new_file.close()
Esempio n. 23
0
def test_correct_more():
    a = "1F-RUDYARD K1PL1NG"
    b = "IF-RUDYARD KIPLING"
    assert correct(a) == b

    a = "R0BERT MERLE - THE DAY 0F THE D0LPH1N"
    b = "ROBERT MERLE - THE DAY OF THE DOLPHIN"
    assert correct(a) == b

    a = "R1CHARD P. FEYNMAN - THE FEYNMAN LECTURE5 0N PHY51C5"
    b = "RICHARD P. FEYNMAN - THE FEYNMAN LECTURES ON PHYSICS"
    assert correct(a) == b

    a = "R1CHARD P. FEYNMAN - 5TAT15T1CAL MECHAN1C5"
    b = "RICHARD P. FEYNMAN - STATISTICAL MECHANICS"
    assert correct(a) == b

    a = "5TEPHEN HAWK1NG - A BR1EF H15T0RY 0F T1ME"
    b = "STEPHEN HAWKING - A BRIEF HISTORY OF TIME"
    assert correct(a) == b

    a = "5TEPHEN HAWK1NG - THE UN1VER5E 1N A NUT5HELL"
    b = "STEPHEN HAWKING - THE UNIVERSE IN A NUTSHELL"
    assert correct(a) == b

    a = "ERNE5T HEM1NGWAY - A FARWELL T0 ARM5"
    b = "ERNEST HEMINGWAY - A FARWELL TO ARMS"
    assert correct(a) == b

    a = "ERNE5T HEM1NGWAY - F0R WH0M THE BELL T0LL5"
    b = "ERNEST HEMINGWAY - FOR WHOM THE BELL TOLLS"
    assert correct(a) == b

    a = "ERNE5T HEM1NGWAY - THE 0LD MAN AND THE 5EA"
    b = "ERNEST HEMINGWAY - THE OLD MAN AND THE SEA"
    assert correct(a) == b

    a = "J. R. R. T0LK1EN - THE L0RD 0F THE R1NG5"
    b = "J. R. R. TOLKIEN - THE LORD OF THE RINGS"
    assert correct(a) == b

    a = "J. D. 5AL1NGER - THE CATCHER 1N THE RYE"
    b = "J. D. SALINGER - THE CATCHER IN THE RYE"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE PH1L050PHER'5 5T0NE"
    b = "J. K. ROWLING - HARRY POTTER AND THE PHILOSOPHER'S STONE"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE CHAMBER 0F 5ECRET5"
    b = "J. K. ROWLING - HARRY POTTER AND THE CHAMBER OF SECRETS"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE PR150NER 0F Azkaban"
    b = "J. K. ROWLING - HARRY POTTER AND THE PRISONER OF Azkaban"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE G0BLET 0F F1RE"
    b = "J. K. ROWLING - HARRY POTTER AND THE GOBLET OF FIRE"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE 0RDER 0F PH0EN1X"
    b = "J. K. ROWLING - HARRY POTTER AND THE ORDER OF PHOENIX"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE HALF-BL00D PR1NCE"
    b = "J. K. ROWLING - HARRY POTTER AND THE HALF-BLOOD PRINCE"
    assert correct(a) == b

    a = "J. K. R0WL1NG - HARRY P0TTER AND THE DEATHLY HALL0W5"
    b = "J. K. ROWLING - HARRY POTTER AND THE DEATHLY HALLOWS"
    assert correct(a) == b

    a = "UR5ULA K. LE GU1N - A W1ZARD 0F EARTH5EA"
    b = "URSULA K. LE GUIN - A WIZARD OF EARTHSEA"
    assert correct(a) == b

    a = "UR5ULA K. LE GU1N - THE T0MB5 0F ATUAN"
    b = "URSULA K. LE GUIN - THE TOMBS OF ATUAN"
    assert correct(a) == b

    a = "UR5ULA K. LE GU1N - THE FARTHE5T 5H0RE"
    b = "URSULA K. LE GUIN - THE FARTHEST SHORE"
    assert correct(a) == b

    a = "UR5ULA K. LE GU1N - TALE5 FR0M EARTH5EA"
    b = "URSULA K. LE GUIN - TALES FROM EARTHSEA"
    assert correct(a) == b