Ejemplo n.º 1
0
def run():
    table = get_data(get_data_path('taxonomy.csv'))
    group_id = species_id = 1
    for index in range(1, len(table)):
        row = table[index]
        kwargs = {
            'id': group_id,
            'order': row[5],
            'family': row[6].split('(')[0].strip(),
            'genus': row[3].split(' ')[0],
        }
        if 'en' in get_languages() and '(' in row[6]:
            kwargs['name_en'] = row[6].split('(')[1][:-1].strip()
        group, created = SpeciesGroup.objects.get_or_create(**kwargs)
        if created:
            group_id += 1

        Species.objects.create(
            id=species_id,
            include=False,
            order=species_id,
            rank=Rank.objects.get(slug=row[1]),
            group=group,
            standard_name=row[4],
            scientific_name=row[3],
        )
        if created:
            species_id += 1
Ejemplo n.º 2
0
def init_settings(data_path):
    global _db_path
    global _db_conn

    db_dir = os.path.join(data_path, 'data')
    _db_path = os.path.join(db_dir, 'settings.sqlite')

    if not os.path.exists(db_dir):
        os.makedirs(db_dir)

    if not os.path.exists(_db_path):
        _db_conn = sqlite3.connect(_db_path)
        _db_conn.execute("""
            CREATE TABLE IF NOT EXISTS lang_settings(
                lang VARCHAR(25) UNIQUE NOT NULL,
                tab_width INTEGER(2) NOT NULL,
                use_spaces INTEGER(1) NOT NULL
            )
        """)

        for lang in get_languages():
            _db_conn.execute('INSERT INTO lang_settings(\
                              lang, tab_width, use_spaces)\
                              VALUES("' + lang + '", 4, 1)')
        _db_conn.commit()

    else:
        _db_conn = sqlite3.connect(_db_path)
Ejemplo n.º 3
0
    def GET(self):
        i = web.input(q="", limit=5)
        i.limit = safeint(i.limit, 5)

        languages = [
            lang for lang in utils.get_languages()
            if lang.name.lower().startswith(i.q.lower())
        ]
        return to_json(languages[:i.limit])
Ejemplo n.º 4
0
def search_engine_3(data_frame, tfidf_vocabulary, query, vocabulary):
    se3 = data_frame

    languages = utils.get_languages(se3)

    # ask user for the language
    print("Choose a language from: ")
    print(languages)
    l = input("Select language: ")

    print("You selected: " + l)

    if (l not in languages):
        print("Sorry, your language is not in our dataset")

    # add column with new ranking
    se3['Similarity'] = se3.apply(utils.cosSim,
                                  axis=1,
                                  tfidf_vocabulary=tfidf_vocabulary,
                                  query=query,
                                  vocabulary=vocabulary)

    # add a column to checking the language
    se3['sel_lan'] = se3.apply(utils.selected_films, axis=1, language=l)

    # create tuples with similarity and film id
    list_tuples = []
    for i in range(30000):
        list_tuples.append(
            (list(se3[se3['film_id'] == i]['Similarity'])[0], i))

    # order tuples and take top 15th
    heapq.heapify(list_tuples)
    largest_sim = nlargest(15, list_tuples)

    # create a data frame with the top 15th results
    res_sim = pd.DataFrame(columns=[
        'title', 'intro', 'Wikipedia_link', 'Running time', 'Similarity'
    ])
    for elem in largest_sim:
        res_sim = res_sim.append(se3[se3['film_id'] == elem[1]][[
            'title', 'intro', 'Wikipedia_link', 'Running time', 'Similarity'
        ]])

    # add column ranking running tima
    res_sim['rank_duration'] = res_sim.apply(utils.R_time_score, axis=1)

    res_sim.sort_values(by=['rank_duration'], inplace=True, ascending=True)

    print(res_sim[['title', 'intro', 'Wikipedia_link',
                   'rank_duration']].to_string())
Ejemplo n.º 5
0
 def GET(self):
     i = web.input(q="", limit=5)
     i.limit = safeint(i.limit, 5)
     
     languages = [lang for lang in utils.get_languages() if lang.name.lower().startswith(i.q.lower())]
     return to_json(languages[:i.limit])