Ejemplo n.º 1
0
def test():
    # Creating a client with a given index name
    client = Client('myIndex')

    # Creating the index definition and schema
    client.drop_index()
    client.create_index([TextField('title', weight=5.0), TextField('body')])

    # Indexing a document
    client.add_document(
        'doc1',
        title='RediSearch',
        body='Redisearch implements a search engine on top of redis')

    # Simple search
    res = client.search("search engine")

    # the result has the total number of results, and a list of documents
    print res.total  # "1"
    print res.docs[0]

    # Searching with snippets
    # res = client.search("search engine", snippet_sizes={'body': 50})

    # Searching with complex parameters:
    q = Query("search engine").verbatim().no_content().paging(0, 5)
    res = client.search(q)
Ejemplo n.º 2
0
 def start(self, data, index_name):
     status = 1
     self.drop_index()
     self.client = Client(index_name, self.host, self.port)
     status = 2
     schema = [
         NumericField('INDEX'),
         TextField('DATA'),
         TextField('SECTION_TYPE'),
         TextField('DOCID'),
         TextField('PAGE'),
         TextField('GRIDID'),
         TextField("ROWCOL"),
         TextField('BBOX'),
         TextField("PAGE_GRID_SE"),
         TextField('Rowspan'),
         TextField('Colspan')
     ]
     #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')])
     status = 3
     self.add_indexing_schema(schema)
     status = 4
     self.add_data(data, index_name)
     status = 5
     return [status]
Ejemplo n.º 3
0
def create_product_search_index_async():
    print 'Creating Search Index'
    client = Client('productIndex')
    client.create_index([
        TextField('title', weight=5.0),
        TextField('description'),
        TextField('tags'),
        TextField('category')
    ])
    products = Product.objects.filter(active=True)
    cache.set('Search_index_total', len(products), timeout=None)
    index = 0
    for product in products:
        title = product.name
        description = product.description
        category = ','.join([cat.name for cat in product.category.all()])
        tag = product.tags
        tag_maps = ProductTagMap.objects.filter(product=product)
        for tag_map in tag_maps:
            tag = tag + tag_map.tag.tag + ' '
        category_varients = []
        for pv in ProductVarientList.objects.filter(product=product):
            for cv in pv.key.all():
                category_varients.append(cv.value)
        tag += ' '.join(list(set(category_varients)))
        client.add_document(str(product.id),
                            title=title,
                            description=description,
                            tags=tag,
                            category=category)
        cache.set('Search_index_index', index, timeout=None)
        index += 1
    return True
Ejemplo n.º 4
0
def cache_to_redis(data: dict):
    if REDIS_HOSTNAME == '':
        print('REDIS_HOSTNAME environment variable is not set')
        return
    client = Client('games', host=REDIS_HOSTNAME, port=REDIS_PORT)
    indexCreated = False
    maxAltNames = len(max(data.values(), key=lambda d: len(d['alt_names']))['alt_names'])
    while not indexCreated:
        try:
            client.create_index([TextField('name', weight=10),
                                *[TextField('alt_name_%d' % i, weight=10) for i in range(maxAltNames)],
                                TextField('summary', weight=1)],
                                TextField('cover', weight=0),
                                TextField('thumb', weight=0))
            indexCreated = True
        except Exception:
            print('Failed to create index, retrying %s')
            time.sleep(3)

    for k, v in data.items():
        client.add_document(k,
                            name=v['name'],
                            **{'alt_name_%d' % i: n for i, n in enumerate(v['alt_names'])},
                            cover=v['cover'],
                            thumb=v['thumb'],
                            summary=v['summary'])
    print('done')
Ejemplo n.º 5
0
    def test_search(self):
        pool = RedisPool(urls=("localhost", 6379))
        search = pool.search("def_index")
        self.assertIsNotNone(search)
        # IndexDefinition is available for RediSearch 2.0+
        definition = IndexDefinition(prefix=['doc:', 'article:'])

        # Creating the index definition and schema
        search.create_index(
            (TextField("title", weight=5.0), TextField("body")),
            definition=definition)

        search.hset('doc:1',
                    mapping={
                        'title':
                        'RediSearch',
                        'body':
                        'Redisearch impements a search engine on top of redis'
                    })

        # Simple search
        res = search.search("search engine")

        # the result has the total number of results, and a list of documents
        self.assertEqual(res.total, 1)
        self.assertEqual(res.docs[0].title, "RediSearch")
Ejemplo n.º 6
0
 def to_search_field(self):
     if self.type == 'int':
         kwargs = {
             "name": self.name,
             "sortable": self.sortable,
             "no_index": self.no_index
         }
         return NumericField(**kwargs)
     if self.type == 'str' or self.type == 'map':
         kwargs = {
             "name": self.name,
             "weight": self.weight,
             "sortable": self.sortable,
             "no_stem": self.no_stem,
             "no_index": self.no_index,
             "phonetic_matcher": self.phonetic_matcher
         }
         return TextField(**kwargs)
     else:
         kwargs = {
             "name": self.name,
             "weight": self.weight,
             "sortable": self.sortable,
             "no_stem": self.no_stem,
             "no_index": self.no_index,
             "phonetic_matcher": self.phonetic_matcher
         }
         return TextField(**kwargs)
Ejemplo n.º 7
0
def build_ipa_index():
    start_time = time.time()
    rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', ''))
    rs_client = Client('IPAIndex', conn=rc)

    print(
        'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it',
        flush=True)
    ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt'
    ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str)

    print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True)
    ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt'
    ipa_index_ou = pd.read_csv(ipa_index_ou_url,
                               sep='\t',
                               na_values=['da_indicare', '*****@*****.**'],
                               dtype=str)
    ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou']
                                    == 'Ufficio_Transizione_Digitale']

    try:
        rs_client.drop_index()
    except:
        pass  # Index already dropped

    rs_client.create_index([
        TextField('ipa_code', weight=2.0),
        TextField('name', weight=2.0, sortable=True),
        TextField('site'),
        TextField('pec'),
        TextField('city', weight=1.4),
        TextField('county'),
        TextField('region'),
        TagField('type'),
        TextField('rtd_name'),
        TextField('rtd_pec'),
        TextField('rtd_mail'),
    ])
    print('Created index `IPAIndex`', flush=True)

    print('Feeding `IPAIndex` with data from `amministrazioni.txt`',
          flush=True)
    for index, row in ipa_index_amm.iterrows():
        rs_client.add_document(row['cod_amm'],
                               language='italian',
                               replace=True,
                               **get_ipa_amm_item(row))

    print('Feeding `IPAIndex` with data from `ou.txt`', flush=True)
    for index, row in ipa_index_ou.iterrows():
        rs_client.add_document(row['cod_amm'],
                               partial=True,
                               **get_ipa_rtd_item(row))

    finish_time = time.time()
    print('`IPAIndex` build completed in {0} seconds'.format(
        round(finish_time - start_time, 2)),
          flush=True)
Ejemplo n.º 8
0
 def __init__(self, client):
     self.client = client
     self.file_path = "./index-data/area.csv"
     self.fields = (
         NumericField("index"),
         NumericField("areaId"),
         TextField("areaTitle"),
         TextField("areaBody"),
     )
Ejemplo n.º 9
0
def to_field(k, v):
    if v == "BOOL":
        return TextField(k, sortable=True)
    elif v == "NUMERIC":
        return NumericField(k, sortable=True)
    elif v == "TEXT":
        return TextField(k)
    elif v == "TAG":
        return TagField(k)
    else:
        return GeoField(k)
Ejemplo n.º 10
0
def import_brewery_geo(r, rsclient):

    # create the brewery redisearch index
    ftidxfields = [
        TextField('name', weight=5.0),
        TextField('address'),
        TextField('city'),
        TextField('state'),
        TextField('country'),
        NumericField('id', sortable=True),
        GeoField('location')
    ]
    rsclient.create_index([*ftidxfields])

    with open(brewerygeofile) as geofile:
        geo = csv.reader(geofile)
        for row in geo:
            if geo.line_num == 1:
                # skip the header line
                continue

            # use the brewery id to generate the brewery key added earlier
            brewery_key = "{}:{}".format(brewery, row[1])

            # get all the data from the brewery hash
            binfo = r.hgetall(brewery_key)

            if not (any(binfo)):
                print ("\tERROR: Missing info for {}, skipping geo import".format(brewery_key))
                continue

            # add the brewery document to the index
            ftaddfields = {
                'name': binfo[b'name'].decode(),
                'address': binfo[b'address1'].decode(),
                'city': binfo[b'city'].decode(),
                'state': binfo[b'state'].decode(),
                'country': binfo[b'country'].decode(),
                'id': row[1],
                'location': "{},{}".format(row[3], row[2])
            }
            try:
                rsclient.add_document(
                    "brewery:{}".format(row[1]),
                    score=1.0,
                    replace=True,
                    partial=True,
                    **ftaddfields
                )
            except Exception as e:
                print ("\tERROR: Failed to add document for {}: {}".format(brewery_key, e))
                continue
Ejemplo n.º 11
0
def create_website_items_index():
    "Creates Index Definition."

    # CREATE index
    client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache())

    # DROP if already exists
    try:
        client.drop_index()
    except Exception:
        pass

    idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)])

    # Based on e-commerce settings
    idx_fields = frappe.db.get_single_value('E Commerce Settings',
                                            'search_index_fields')
    idx_fields = idx_fields.split(',') if idx_fields else []

    if 'web_item_name' in idx_fields:
        idx_fields.remove('web_item_name')

    idx_fields = list(map(to_search_field, idx_fields))

    client.create_index(
        [TextField("web_item_name", sortable=True)] + idx_fields,
        definition=idx_def,
    )

    reindex_all_web_items()
    define_autocomplete_dictionary()
Ejemplo n.º 12
0
def create_website_items_index():
    "Creates Index Definition."

    # CREATE index
    client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache())

    try:
        client.drop_index()  # drop if already exists
    except ResponseError:
        # will most likely raise a ResponseError if index does not exist
        # ignore and create index
        pass
    except Exception:
        raise_redisearch_error()

    idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)])

    # Index fields mentioned in e-commerce settings
    idx_fields = frappe.db.get_single_value("E Commerce Settings",
                                            "search_index_fields")
    idx_fields = idx_fields.split(",") if idx_fields else []

    if "web_item_name" in idx_fields:
        idx_fields.remove("web_item_name")

    idx_fields = list(map(to_search_field, idx_fields))

    client.create_index(
        [TextField("web_item_name", sortable=True)] + idx_fields,
        definition=idx_def,
    )

    reindex_all_web_items()
    define_autocomplete_dictionary()
Ejemplo n.º 13
0
 def __init__(self):
     self.r = redis.from_url(config.EVENT_BROKER_URL)
     self.client = Client('CCTV_DATA')
     try:
         self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')])
     except Exception as error:
         print("Error while creatign index", error)
Ejemplo n.º 14
0
    def __init__(self, hostname: str) -> None:
        self._idx_name = "movies"
        super().__init__(hostname, self._idx_name)
        try:
            info_exists = self._client.info()
            if info_exists:
                self._client.drop_index()
        except:
            pass
        definition = IndexDefinition(prefix=[f'{self._idx_name}:'])

        self._client.create_index((
            TextField('Title'),
            TextField('Plot'),
            TextField('imdbID'),
        ),
                                  definition=definition)
Ejemplo n.º 15
0
 def start(self, data, doc_id, company, project):
     status = 1
     index_name = project + "_DOCUMENT_" + str(doc_id)
     self.drop_index()
     self.client = Client(index_name, self.host, self.port)
     status = 2
     schema = [
         NumericField('INDEX'),
         TextField('DATA'),
         TextField('PAGE'),
         TextField('BBOX')
     ]
     status = 3
     self.add_indexing_schema(schema)
     status = 4
     self.add_data(data, company, doc_id, project)
     status = 5
     return [status]
Ejemplo n.º 16
0
    def createHub(self):
        logger.info('Creating the hub in the database {}'.format(self._ts))
        # Store the master modules catalog as an object
        self.dconn.jsonset(
            self._hubkey, Path.rootPath(), {
                'created': str(_toepoch(self._ts)),
                'modules': {},
                'submissions': [],
                'submit_enabled': False
            })

        # Create a RediSearch index for the modules
        # TODO: catch errors
        self.sconn.create_index(
            (TextField('name', sortable=True), TextField('description'),
             NumericField('stargazers_count', sortable=True),
             NumericField('forks_count', sortable=True),
             NumericField('last_modified', sortable=True)),
            stopwords=stopwords)
Ejemplo n.º 17
0
    def create_index_definition(self, drop_existing=False):
        """
        Create an index definition. Do nothing if it already exists.
        """

        if drop_existing:
            self.client.drop_index()

        definition = IndexDefinition(prefix=[self.keys.pre("resource:")])
        try:
            self.client.create_index([
                TextField('body', weight=1),
                TextField('repo_name', weight=1.5),
                TextField('language', weight=1),
                TextField('lists')
            ],
                                     definition=definition)
        except ResponseError:
            print("Index already exists.")
Ejemplo n.º 18
0
    def build_index(self, line_doc_path, n_docs):
        line_pool = LineDocPool(line_doc_path)

        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([TextField('title'), TextField('url'), TextField('body')])

        for i, d in enumerate(line_pool.doc_iterator()):
            self.client.add_document(i, nosave = True, title = d['doctitle'],
                    url = d['url'], body = d['body'])

            if i + 1 == n_docs:
                break

            if i % 1000 == 0:
                print "{}/{} building index".format(i, n_docs)
Ejemplo n.º 19
0
    def get(self, request):
        # data=request.data
        mes = {}
        search_key = request.GET.get('key')
        print(search_key)
        all_classes = Course.objects.all()
        print("开始创建索引——————————————————————————")
        # 创建一个客户端与给定索引名称
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        # 创建索引定义和模式
        client.create_index((TextField('title'), TextField('body')))
        print('索引创建完毕————————————————————————————————')
        print('开始添加数据————————————————————————————————')

        for i in all_classes:
            print(str(i.id) + str(i.title))
            # 索引文
            client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info,
                                language='chinese')
            print(333333333)
        print('数据添加完毕————————————————————————————————')
        print(client.info())
        # 查找搜索
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        res = client.search(search_key)
        print('查询结束————————————————————————————————————————————————')
        id_list = []
        print(res.docs)
        for i in res.docs:
            # print(i.title)  # 取出title,以@切割,取课程ID查询,然后序列化展示
            id = i.title.split('@')[1]
            id_list.append(id)
        course = Course.objects.filter(id__in=id_list).all()
        c = CourseSerializersModel(course, many=True)
        mes['course'] = c.data
        mes['code'] = 200
        mes['message'] = '搜索完毕'
        return Response(mes)
Ejemplo n.º 20
0
    def create_user_index(self, users):
        """
        Creates a new user index if not exists
        :param users:
        :return:
        """
        definition = IndexDefinition(prefix=['doc:', 'user:'])

        try:
            self.client.create_index(
                (TextField("first_name"), TextField("last_name"),
                 TextField("email"), NumericField("age"),
                 NumericField("is_employee"),
                 NumericField("user_id", sortable=True)),
                definition=definition)
        except redis.exceptions.ResponseError:
            return False

        indexer = self.client.batch_indexer(chunk_size=len(users))

        for user in users:
            fields = {
                "first_name":
                user.first_name.translate(str.maketrans({"-": r"\-"})),
                "last_name":
                user.last_name.translate(str.maketrans({"-": r"\-"})),
                "email":
                user.email.translate(str.maketrans({"-": r"\-"})),
                "age":
                user.age,
                "user_id":
                user.id,
                "is_employee":
                int(user.is_employee),
            }
            indexer.add_document(f"doc:{user.id}", **fields)
        indexer.commit()

        return True
Ejemplo n.º 21
0
    def clientpush(self):
        client = Client('Checkout')

        client.create_index([
            NumericField('Key'),
            TextField('UsageClass'),
            TextField('CheckoutType'),
            TextField('MaterialType'),
            NumericField('CheckoutYear'),
            NumericField('CheckoutMonth'),
            NumericField('Checkouts'),
            TextField('Title'),
            TextField('Creator'),
            TextField('Subjects'),
            TextField('Publisher'),
            TextField('PublicationYear')
        ])

        db_connection, _ = self.connect()
        cursor = db_connection.cursor()
        cursor.execute('SELECT * FROM customers')
        results = cursor.fetchall()
        i = 0
        for result in results:
            client.add_document('doc%s' % i,
                                Key=result[0],
                                UsageClass=result[1],
                                CheckoutType=result[2],
                                MaterialType=result[3],
                                CheckoutYear=result[4],
                                CheckoutMonth=result[5],
                                Checkouts=result[6],
                                Title=result[7],
                                Creator=result[8],
                                Subjects=result[9],
                                Publisher=result[10],
                                PublicationYear=result[11])
            i += 1
            print(i)
        res = client.search('BOOK')

        print("{}   {}".format(res.total, res.docs[0].Title))
        res1 = client.search("use")
        print(res1)
        q = Query('use').verbatim().no_content().paging(0, 5)
        res1 = client.search(q)
        print(res1)
        cursor.close()
        db_connection.close()
def insert():
    # insertion of search/suggestion data
    suggestion_client = Client('movie')
    suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')])

    for i in range(0, len(movie_df)):
        suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i])

    # insertion of auto-completion data
    completion_client = AutoCompleter('ac')

    for i in range(0, len(movie_df)):
        completion_client.add_suggestions(Suggestion(movie_df['title'][i]))
Ejemplo n.º 23
0
    def create_index(self):
        client = self.get_indexed_client()

        definition = IndexDefinition(prefix=['stock:'])

        index_fields = [
            TextField("SC_NAME"),
        ]
        for index in self.numeric_indexes:
            index_fields.append(NumericField(index))

        try:
            # FT.CREATE idx:stock ON HASH PREFIX 1 stock: SCHEMA SC_NAME TEXT ...
            client.create_index(index_fields, definition=definition)
        except redis.exceptions.ResponseError as e:
            # FT.DROPINDEX idx:stock DD
            if (str(e) != "Index already exists"):
                raise e
    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('WORDCOUNT', sortable=True),
            TextField('BYLINE', no_stem=True, sortable=True),
            TextField('DOCUMENTTYPE', sortable=True),
            TextField('HEADLINE', sortable=True),
            TagField('KEYWORDS', separator=';'),
            NumericField('MULTIMEDIA', sortable=True),
            TextField('NEWDESK', sortable=True),
            NumericField('PRINTPAGE', sortable=True),
            NumericField('PUBDATE', sortable=True),
            TextField('SECTIONNAME', sortable=True),
            TextField('SNIPPET', sortable=True),
            TextField('TYPEOFMATERIAL', sortable=True),
            TextField('WEBURL')
        ])
Ejemplo n.º 25
0
import hashlib 
import gpxpy 
import gpxpy.gpx 
from redisearch import Client, Query, TextField, GeoField, NumericField


client = Client(
   'attractions',
   host='127.0.0.1',
   password='',
   port=6379
   )

client.create_index([
   TextField('title', weight=5.0),
   TextField('description'),
   NumericField('verified', sortable=True),
   GeoField('geo'),
])


gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8')

gpx = gpxpy.parse(gpx_file)

for waypoint in gpx.waypoints:
    if "Verified" in waypoint.comment:
        v = 1
    else:
        v = 0
    t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude)
Ejemplo n.º 26
0
from redisearch import Client, TextField

# Creating a client with a given index name
client = Client('myIndex')

# Creating the index definition and schema
client.create_index((TextField('title', weight=5.0), TextField('body')))

# Indexing a document
client.add_document(
    'doc1',
    title='RediSearch',
    body='Redisearch impements a search engine on top of redis')
Ejemplo n.º 27
0
import re
from datetime import datetime
import json
import logging
from random import randint
from time import sleep

#stagger reading and indexing for parallel
sleep(randint(1, 10))

logging.basicConfig(filename='parse.log',level=logging.INFO)

client = Client('medline')

try:
        client.create_index([TextField('abstract')])

except ResponseError:
        pass

with open(sys.argv[1], 'r') as f:
        data=f.read()

recs = data.split("<PubmedArticle>");
recs = recs[1:]

indexer = client.batch_indexer(chunk_size=500)

count = 0

for r in recs:
Ejemplo n.º 28
0
import pandas as pd 
import json
from tqdm import tqdm
from redisearch import Client, TextField, NumericField, Query
from time import sleep
from rediscluster import StrictRedisCluster

sleep(15)

nodes = [{'host': "173.17.0.2", 'port': "7000"}]
rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True)


client=Client('week1', conn=rc)
client.create_index([TextField('name'), TextField('surname'), TextField('job')])
dat = pd.read_csv("test.csv")


for idx, row in tqdm(dat.iterrows()):
	client.add_document(f"{row['index']}", replace=True, partial=True, name = f"{row['name']}", surname = f"{row['surname']}", job = f"{row['job']}")
Ejemplo n.º 29
0
#--------------------------------------------
# Import the whole dirctory to redisearch
# Create the index and the documents
# Change the the dirt to your document's path
#--------------------------------------------

import os
from redisearch import Client, Query, TextField

dirt = "/path/to/the/documents/"  # Change it to your own path

client = Client("BoxGroup", port=6379)  # 6379 as default
client.create_index([TextField('title'), TextField('body')])

filelist = os.listdir(dirt)
filelist = sorted(filelist)
try:
    filelist.remove(".git")
except:
    print("git目录不存在,已跳过")
filecounter = 0
for filename in filelist:
    openfilename = dirt + filename
    with open(openfilename, "r+") as f:
        data = f.read()
        try:
            client.add_document(filecounter,
                                title=filename,
                                body=data,
                                language="chinese")
        except:
Ejemplo n.º 30
0
def to_search_field(field):
    if field == "tags":
        return TagField("tags", separator=",")

    return TextField(field)