Exemple #1
0
class LawSchema(SchemaClass):
    PK = ID(unique=True)
    ACTIVE_FLG = BOOLEAN()
    ARTICLE = ID()
    ARTICLE_HEADING = TEXT(stored=True)
    ARTICLE_HISTORY = TEXT(stored=True)
    CHAPTER = ID()
    CHAPTER_HEADING = TEXT(stored=True)
    CODE_HEADING = TEXT(stored=True)
    DIVISION = ID()
    DIVISION_HEADING = TEXT(stored=True)
    EFFECTIVE_DATE = DATETIME(stored=True)
    HISTORY = TEXT(stored=True)
    LAW_CODE = ID()
    LAW_SECTION_VERSION_ID = ID()
    LEGAL_TEXT = TEXT(stored=True)
    LOB_FILE = ID()
    OP_CHAPTER = ID()
    OP_SECTION = ID()
    OP_STATUES = ID()
    PART = ID()
    SECTION_HISTORY = TEXT(stored=True)
    SECTION_NUM = ID(stored=True)
    SECTION_TITLE = TEXT(stored=True)
    TITLE = ID()
    TRANS_UID = ID()
    TRANS_UPDATE = DATETIME()
Exemple #2
0
def get_schema():
    return Schema(
        id=NUMERIC(stored=True, unique=True, numtype=int),
        title=TEXT(stored=True),
        content=TEXT(),
        correspondent=TEXT(stored=True),
        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
        type=TEXT(stored=True),
        created=DATETIME(stored=True, sortable=True),
        modified=DATETIME(stored=True, sortable=True),
        added=DATETIME(stored=True, sortable=True),
    )
Exemple #3
0
def get_schema_tema():
    return Schema(titulo=TEXT(stored=True),
                  link=TEXT(stored=True),
                  autor=TEXT(stored=True),
                  fecha=DATETIME(stored=True),
                  numRespuestas=NUMERIC(stored=True),
                  numVisitas=NUMERIC(stored=True))
def get_schema_temas():
    return Schema(titulo=TEXT(stored=True),
                  link_tema=ID(unique=True, stored=True),
                  autor=KEYWORD(stored=True),
                  fecha=DATETIME(stored=True),
                  n_respuestas=STORED,
                  n_visitas=STORED)
    def __init__(self, config):
        self.schema = Schema(
            id=ID(unique=True),
            title=TEXT(stored=True, field_boost=3.0, analyzer=StandardAnalyzer() | NgramFilter(minsize=2, maxsize=3)),
            author=TEXT(stored=True),
            creation_date=DATETIME(stored=True),
            pages=STORED,
            content=TEXT(stored=True, analyzer=StandardAnalyzer(stoplist=None)),
            lang=TEXT(stored=True),
            size=STORED,
            tags=KEYWORD(stored=True, commas=True)
        )

        self.index_path = config['WHOOSH_INDEX']

        if not os.path.exists(self.index_path):
            os.mkdir(self.index_path)
            create_in(self.index_path, self.schema)

        self.indexer = open_dir(self.index_path)
        self.parser_content = MultifieldParser(["title", "content"], schema=self.schema)
        self.parser_content.add_plugin(DateParserPlugin())
        self.date_format = {
            'last_24h': u'-24h to now',
            'last_week': u'last week',
            'last_month_to_now': u'-1mo to now',
            'last_year_to_now': u"[-2yrs to now]"
        }
Exemple #6
0
def createSearchableDatafromUrl():
    url = "https://wanderinginn.com/2016/07/27/1-00/"
    if not os.path.exists("indexdir"):
        os.mkdir("indexdir")
    schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True),
                    textdata=TEXT(stored=True), date=DATETIME(sortable=True), url=ID(stored=True),
                    wordcount=NUMERIC(stored=True, sortable=True))
    ix = create_in("indexdir", schema)
    writer = ix.writer()
    while True:
        currentPage = requests.get(url)
        soup = BeautifulSoup(currentPage.content, "lxml")
        body = soup.find("div", {"class": "entry-content"})
        title = soup.find("h1", {"class": "entry-title"})
        p_date = soup.find("time", {"class": "entry-date"})
        p_date_converted = datetime.strptime(p_date['datetime'], '%Y-%m-%dT%H:%M:%S+00:00')
        url_list = body.find_all('a')
        count = len(re.findall(r'\w+', body.text))
        print(title.text)
        print(url)
        print(count)
        print(p_date_converted)
        writer.add_document(title=title.text, content=body.text, textdata=body.text, date=p_date_converted, url=url,
                            wordcount=count)
        try:
            url = url_list[-1].get('href')
        except:
            writer.commit()
            break
Exemple #7
0
def get_schema():
    return Schema(titulo=TEXT(stored=True),
                  tituloOriginal=TEXT(stored=True),
                  fechaEstreno=DATETIME(stored=True),
                  director=TEXT(stored=True),
                  reparto=TEXT,
                  sinopsis=TEXT)
Exemple #8
0
class IndexMsg:
    schema = Schema(
        content=TEXT(stored=True, analyzer=ChineseAnalyzer()),
        url=ID(stored=True, unique=True),
        # for `chat_id` we are using TEXT instead of NUMERIC here, because NUMERIC
        # do not support iterating all values of the field
        chat_id=TEXT(stored=True),
        post_time=DATETIME(stored=True, sortable=True),
        sender=TEXT(stored=True),
    )

    def __init__(self, content: str, url: str, chat_id: Union[int, str],
                 post_time: datetime, sender: str):
        self.content = content
        self.url = url
        self.chat_id = int(chat_id)
        self.post_time = post_time
        self.sender = sender

    def as_dict(self):
        return {
            'content': self.content,
            'url': self.url,
            'chat_id': str(self.chat_id),
            'post_time': self.post_time,
            'sender': self.sender
        }

    def __str__(self):
        return f'IndexMsg' + ', '.join(f'{k}={repr(v)}'
                                       for k, v in self.as_dict().items())
def _prepare_writer():
    schema = Schema(title=TEXT(stored=True),
                    path=ID(stored=True),
                    content=KEYWORD,
                    datetime=DATETIME(stored=True))
    ix = create_in(WHOOSH_INDEX_NAME, schema)
    return ix.writer()
Exemple #10
0
def almacenar_datos():
    schem = Schema(categoria=TEXT(stored=True),
                   titulo=TEXT(stored=True),
                   enlaceNoticia=TEXT(stored=True),
                   descripcion=TEXT(stored=True),
                   fecha=DATETIME(stored=True))

    if os.path.exists("Index"):
        shutil.rmtree("Index")
    os.mkdir("Index")

    ix = create_in("Index", schema=schem)
    writer = ix.writer()
    i = 0
    extraer_noticias()
    for j in range(len(titulos)):
        writer.add_document(categoria=str(categorias[j]),
                            titulo=str(titulos[j]),
                            enlaceNoticia=str(enlacesNoticias[j]),
                            descripcion=str(descripciones[j]),
                            fecha=fechas[j])
        i += 1
    writer.commit()
    messagebox.showinfo("Fin de indexado",
                        "Se han indexado " + str(i) + " noticias")
Exemple #11
0
def get_thread_schema():
    return Schema(titulo=TEXT(stored=True),
                  link=ID(stored=True),
                  autor=TEXT(stored=True),
                  fecha=DATETIME(stored=True),
                  respuestas=STORED,
                  visitas=STORED)
Exemple #12
0
    def fields_map(self, field_type):
        if field_type == "primary":
            return ID(stored=True, unique=True)
        type_map = {
            'date': types.Date,
            'datetime': types.DateTime,
            'boolean': types.Boolean,
            'integer': types.Integer,
            'float': types.Float
        }
        if isinstance(field_type, str):
            field_type = type_map.get(field_type, types.Text)

        if not isinstance(field_type, type):
            field_type = field_type.__class__

        if issubclass(field_type, (types.DateTime, types.Date)):
            return DATETIME(stored=True, sortable=True)
        elif issubclass(field_type, types.Integer):
            return NUMERIC(stored=True, numtype=int)
        elif issubclass(field_type, types.Float):
            return NUMERIC(stored=True, numtype=float)
        elif issubclass(field_type, types.Boolean):
            return BOOLEAN(stored=True)
        return TEXT(stored=True, analyzer=self.analyzer, sortable=False)
Exemple #13
0
def almacenar_datos():

    # define el esquema de la información
    schem = Schema(titulo=TEXT(stored=True),
                   autor=TEXT(stored=True),
                   fuente=TEXT(stored=True),
                   link=ID(stored=True),
                   fechahora=DATETIME(stored=True),
                   contenido=TEXT)

    # eliminamos el directorio del índice, si existe
    if os.path.exists("Index"):
        shutil.rmtree("Index")
    os.mkdir("Index")

    # creamos el índice
    ix = create_in("Index", schema=schem)
    # creamos un writer para poder añadir documentos al indice
    writer = ix.writer()
    i = 0
    lista = extraer_noticias()
    for noticia in lista:
        # añade cada noticia de la lista al índice
        writer.add_document(titulo=str(noticia[0]),
                            autor=str(noticia[1]),
                            fuente=str(noticia[2]),
                            link=str(noticia[3]),
                            fechahora=noticia[4],
                            contenido=str(noticia[5]))
        i += 1
    writer.commit()
    messagebox.showinfo("Fin de indexado",
                        "Se han indexado " + str(i) + " noticias")
Exemple #14
0
def get_schema():
    return Schema(categoria=TEXT(stored=True),
                  titulo=TEXT(stored=True),
                  enlace=TEXT(stored=True),
                  fecha=DATETIME(stored=True),
                  descripcion=TEXT(stored=True),
                  nombrefichero=ID(stored=True))
Exemple #15
0
def get_schema():
    return Schema(remitente=TEXT(stored=True),
                  destinatarios=TEXT(stored=True),
                  fecha=DATETIME(stored=True),
                  asunto=TEXT(stored=True),
                  contenido=TEXT(stored=True),
                  nombrefichero=ID(stored=True))
Exemple #16
0
def almacenar_datos():

    schem = Schema(titulo=TEXT(stored=True),
                   tituloOriginal=TEXT(stored=True),
                   fechaEstrenoSpain=DATETIME(stored=True),
                   paises=TEXT(stored=True),
                   generos=TEXT(stored=True),
                   director=TEXT(stored=True),
                   sinopsis=TEXT(stored=True))

    if os.path.exists("Index"):
        shutil.rmtree("Index")
    os.mkdir("Index")

    ix = create_in("Index", schema=schem)
    writer = ix.writer()
    i = 0
    extraerDatos()
    for j in range(len(titulos)):
        writer.add_document(titulo=str(titulos[j]),
                            tituloOriginal=str(titulosOriginales[j]),
                            fechaEstrenoSpain=fechaEstrenoSpain[j],
                            paises=str(paises[j]),
                            generos=generos[j],
                            director=str(directores[j]),
                            sinopsis=str(sinopsis[j]))
        i += 1
    writer.commit()
    messagebox.showinfo("Fin de indexado",
                        "Se han indexado " + str(i) + " películas")
Exemple #17
0
def create():
    """ Create a new Whoosh index.. """
    print 'creating new index in directory %s' % DIRECTORY
    os.system('rm -rf %s' % DIRECTORY)
    os.mkdir(DIRECTORY)
    schema = Schema(source=ID(stored=True, unique=True),
                    cached=ID(stored=True, unique=True),
                    hash=ID(stored=True, unique=True),
                    title=TEXT(stored=True),
                    author=TEXT(stored=True),
                    year=TEXT(stored=True),
                    notes=TEXT(stored=True),
                    text=TEXT(stored=True),
                    tags=TEXT(stored=True, analyzer=KeywordAnalyzer()),
                    added=DATETIME(stored=True),
                    mtime=DATETIME(stored=True))
    create_in(DIRECTORY, schema, NAME)
Exemple #18
0
class EventSchema(SchemaClass):

    id = NUMERIC(stored=True, unique=True)
    title = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=1.5)
    description = TEXT(stored=True,
                       analyzer=StemmingAnalyzer(),
                       field_boost=1.0)
    date = DATETIME(stored=True)
Exemple #19
0
class PhotSchema(SchemaClass):
    id = ID(stored=True, unique=True)
    created = DATETIME()
    title = TEXT(analyzer=custom_analyzer, spelling=True)
    short_code = ID(stored=True, unique=True)
    disable = BOOLEAN()
    tags = TEXT(analyzer=tag_analyzer, spelling=True)
    user = ID()
Exemple #20
0
def get_schema():
    return Schema(title=TEXT(stored=True),
                  link=TEXT(stored=True),
                  autor=TEXT(stored=True),
                  date=DATETIME(stored=True),
                  respuestas=TEXT(stored=True),
                  visitas=TEXT(stored=True),
                  respuestasText=TEXT(stored=True))
Exemple #21
0
 def __init__(self, location):
     assert location
     self.schema = Schema(uuid=ID(stored=True, unique=True),
                          sender=TEXT(stored=True),
                          recipient=TEXT(stored=True),
                          keywords=TEXT(stored=True),
                          date=DATETIME(stored=True),
                          repo_files=STORED)
     self.location = location
Exemple #22
0
def make_schema():
    return Schema(
        paper_field=KEYWORD(stored=True, lowercase=True, scorable=True),
        title=TEXT(stored=True, analyzer=StemmingAnalyzer()),
        authors=KEYWORD(stored=True, lowercase=True),
        pdf=ID(stored=True),
        abstract=TEXT(stored=True, analyzer=StemmingAnalyzer()),
        date=DATETIME(stored=True),
    )
Exemple #23
0
def init_extensions(app):
    global use_cache
    whoosh_searcher.init_app(app)
    configure_uploads(app, upload_photos)
    mail.init_app(app)
    admin.init_app(app)
    mongo.init_app(app, "MONGO")
    oauth.init_app(app)
    login_manager.init_app(app)
    # use_cache = app.config.get('USE_CACHE', False)
    # if use_cache:
    #     cache.init_app(app, {})

    with app.app_context():
        # 添加flask-admin视图
        admin.add_view(admin_view.RolesModelView(mongo.db['roles'], '角色管理'))
        admin.add_view(admin_view.UsersModelView(mongo.db['users'], '用户管理'))
        admin.add_view(
            admin_view.CatalogsModelView(mongo.db['catalogs'],
                                         '栏目管理',
                                         category='内容管理'))
        admin.add_view(
            admin_view.PostsModelView(mongo.db['posts'],
                                      '帖子管理',
                                      category='内容管理'))
        admin.add_view(
            admin_view.PassagewaysModelView(mongo.db['passageways'],
                                            '温馨通道',
                                            category='推广管理'))
        admin.add_view(
            admin_view.FriendLinksModelView(mongo.db['friend_links'],
                                            '友链管理',
                                            category='推广管理'))
        admin.add_view(
            admin_view.PagesModelView(mongo.db['pages'],
                                      '页面管理',
                                      category='推广管理'))
        admin.add_view(
            admin_view.FooterLinksModelView(mongo.db['footer_links'],
                                            '底部链接',
                                            category='推广管理'))
        admin.add_view(
            admin_view.AdsModelView(mongo.db['ads'], '广告管理', category='推广管理'))
        admin.add_view(admin_view.OptionsModelView(mongo.db['options'],
                                                   '系统设置'))

        # 初始化Whoosh索引
        chinese_analyzer = ChineseAnalyzer()
        post_schema = Schema(obj_id=ID(unique=True, stored=True),
                             title=TEXT(stored=True,
                                        analyzer=chinese_analyzer),
                             content=TEXT(stored=True,
                                          analyzer=chinese_analyzer),
                             create_at=DATETIME(stored=True),
                             catalog_id=ID(stored=True),
                             user_id=ID(stored=True))
        whoosh_searcher.add_index('posts', post_schema)
Exemple #24
0
def get_schema():
    return Schema(numeroJornada=NUMERIC(stored=True),
                  local=TEXT(stored=True),
                  visitante=TEXT(stored=True),
                  golesLocales=NUMERIC,
                  golesVisitantes=NUMERIC,
                  fecha=DATETIME(stored=True),
                  autor=TEXT,
                  titulo=TEXT(stored=True),
                  cronica=TEXT)
Exemple #25
0
def get_schema():
    return Schema(
        id=NUMERIC(stored=True, unique=True),
        title=TEXT(sortable=True),
        content=TEXT(),
        asn=NUMERIC(sortable=True),
        correspondent=TEXT(sortable=True),
        correspondent_id=NUMERIC(),
        has_correspondent=BOOLEAN(),
        tag=KEYWORD(commas=True, scorable=True, lowercase=True),
        tag_id=KEYWORD(commas=True, scorable=True),
        has_tag=BOOLEAN(),
        type=TEXT(sortable=True),
        type_id=NUMERIC(),
        has_type=BOOLEAN(),
        created=DATETIME(sortable=True),
        modified=DATETIME(sortable=True),
        added=DATETIME(sortable=True),
    )
def get_schema():
    analyzer = StemmingAnalyzer(stoplist=STOP) | StopFilter(stoplist=STOP)
    schema = Schema(title=TEXT(analyzer=analyzer, stored=True, sortable=True),
                    content=TEXT(analyzer=analyzer, stored=True,
                                 sortable=True),
                    tags=KEYWORD(commas=True, stored=True),
                    author=TEXT(stored=True),
                    uid=ID(unique=True, stored=True),
                    lastedit_date=DATETIME(sortable=True, stored=True))
    return schema
Exemple #27
0
def get_schema():
    return Schema(jornada=TEXT(stored=True),
                  local=TEXT(stored=True),
                  visitante=TEXT(stored=True),
                  resultado=TEXT(stored=True),
                  fecha=DATETIME(stored=True),
                  autor=TEXT(stored=True),
                  titular=TEXT(stored=True),
                  titulo=TEXT(stored=True),
                  texto=TEXT)
Exemple #28
0
    def build_schema(self, fields):
        schema_fields = {
            ID: WHOOSH_ID(stored=True, unique=True),
            DJANGO_CT: WHOOSH_ID(stored=True),
            DJANGO_ID: WHOOSH_ID(stored=True),
        }
        # Grab the number of keys that are hard-coded into Haystack.
        # We'll use this to (possibly) fail slightly more gracefully later.
        initial_key_count = len(schema_fields)
        content_field_name = ''

        for field_name, field_class in fields.items():
            if field_class.is_multivalued:
                if field_class.indexed is False:
                    schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
                else:
                    schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True,
                                                                         field_boost=field_class.boost)
            elif field_class.field_type in ['date', 'datetime']:
                schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
            elif field_class.field_type == 'integer':
                schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int,
                                                                     field_boost=field_class.boost)
            elif field_class.field_type == 'float':
                schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float,
                                                                     field_boost=field_class.boost)
            elif field_class.field_type == 'boolean':
                # Field boost isn't supported on BOOLEAN as of 1.8.2.
                schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
            elif field_class.field_type == 'ngram':
                schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored,
                                                                   field_boost=field_class.boost)
            elif field_class.field_type == 'edge_ngram':
                schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
                                                                        stored=field_class.stored,
                                                                        field_boost=field_class.boost)
            else:
                # schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
                # 中文分词
                schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(),
                                                                  field_boost=field_class.boost, sortable=True)

            if field_class.document is True:
                content_field_name = field_class.index_fieldname
                schema_fields[field_class.index_fieldname].spelling = True

        # Fail more gracefully than relying on the backend to die if no fields
        # are found.
        if len(schema_fields) <= initial_key_count:
            raise SearchBackendError(
                "No fields were found in any search_indexes. Please correct this before attempting to search.")

        return (content_field_name, Schema(**schema_fields))
Exemple #29
0
class NoteSchema(SchemaClass):
    id = ID(stored=True, unique=True)
    created = DATETIME()
    title = TEXT(analyzer=custom_analyzer, spelling=True)
    contents = TEXT(spelling=True)
    public = BOOLEAN()
    draft = BOOLEAN()
    short_code = ID(stored=True, unique=True)
    disable = BOOLEAN()
    reported = BOOLEAN()
    tags = TEXT(analyzer=tag_analyzer, spelling=True)
    user = ID()
Exemple #30
0
def get_index():
    ixname = "search-index"
    if os.path.isdir(ixname):
        from whoosh.index import open_dir
        ix = open_dir(ixname)
    else:
        schema = Schema(title=TEXT(stored=True),
                        url=ID(stored=True, unique=True),
                        content=TEXT(stored=True),
                        modified=DATETIME(sortable=True))
        os.mkdir(ixname)
        ix = create_in(ixname, schema)
    return ix