Beispiel #1
0
 def __init__(self, client):
     self.client = client
     self.file_path = "./index-data/area.csv"
     self.fields = (
         NumericField("index"),
         NumericField("areaId"),
         TextField("areaTitle"),
         TextField("areaBody"),
     )
    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('ORDERNUMBER'),
            NumericField('QUANTITYORDERED', sortable=True),
            NumericField('PRICEEACH', sortable=True),
            NumericField('ORDERLINENUMBER'),
            NumericField('SALES', sortable=True),
            TextField('ORDERDATE'),
            TextField('STATUS', sortable=True),
            NumericField('QTR_ID', sortable=True),
            NumericField('MONTH_ID', sortable=True),
            NumericField('YEAR_ID', sortable=True),
            TextField('PRODUCTLINE', sortable=True),
            NumericField('MSRP', sortable=True),
            TextField('PRODUCTCODE', sortable=True),
            TextField('CUSTOMERNAME', sortable=True),
            TextField('PHONE'),
            TextField('ADDRESSLINE1'),
            TextField('ADDRESSLINE2'),
            TextField('CITY', sortable=True),
            TextField('STATE', sortable=True),
            TextField('POSTALCODE', sortable=True),
            TextField('COUNTRY', sortable=True),
            TextField('TERRITORY', sortable=True),
            TextField('CONTACTLASTNAME'),
            TextField('CONTACTFIRSTNAME'),
            TextField('DEALSIZE', sortable=True)
        ])
Beispiel #3
0
    def clientpush(self):
        client = Client('Checkout')

        client.create_index([
            NumericField('Key'),
            TextField('UsageClass'),
            TextField('CheckoutType'),
            TextField('MaterialType'),
            NumericField('CheckoutYear'),
            NumericField('CheckoutMonth'),
            NumericField('Checkouts'),
            TextField('Title'),
            TextField('Creator'),
            TextField('Subjects'),
            TextField('Publisher'),
            TextField('PublicationYear')
        ])

        db_connection, _ = self.connect()
        cursor = db_connection.cursor()
        cursor.execute('SELECT * FROM customers')
        results = cursor.fetchall()
        i = 0
        for result in results:
            client.add_document('doc%s' % i,
                                Key=result[0],
                                UsageClass=result[1],
                                CheckoutType=result[2],
                                MaterialType=result[3],
                                CheckoutYear=result[4],
                                CheckoutMonth=result[5],
                                Checkouts=result[6],
                                Title=result[7],
                                Creator=result[8],
                                Subjects=result[9],
                                Publisher=result[10],
                                PublicationYear=result[11])
            i += 1
            print(i)
        res = client.search('BOOK')

        print("{}   {}".format(res.total, res.docs[0].Title))
        res1 = client.search("use")
        print(res1)
        q = Query('use').verbatim().no_content().paging(0, 5)
        res1 = client.search(q)
        print(res1)
        cursor.close()
        db_connection.close()
Beispiel #4
0
 def to_search_field(self):
     if self.type == 'int':
         kwargs = {
             "name": self.name,
             "sortable": self.sortable,
             "no_index": self.no_index
         }
         return NumericField(**kwargs)
     if self.type == 'str' or self.type == 'map':
         kwargs = {
             "name": self.name,
             "weight": self.weight,
             "sortable": self.sortable,
             "no_stem": self.no_stem,
             "no_index": self.no_index,
             "phonetic_matcher": self.phonetic_matcher
         }
         return TextField(**kwargs)
     else:
         kwargs = {
             "name": self.name,
             "weight": self.weight,
             "sortable": self.sortable,
             "no_stem": self.no_stem,
             "no_index": self.no_index,
             "phonetic_matcher": self.phonetic_matcher
         }
         return TextField(**kwargs)
Beispiel #5
0
 def start(self, data, index_name):
     status = 1
     self.drop_index()
     self.client = Client(index_name, self.host, self.port)
     status = 2
     schema = [
         NumericField('INDEX'),
         TextField('DATA'),
         TextField('SECTION_TYPE'),
         TextField('DOCID'),
         TextField('PAGE'),
         TextField('GRIDID'),
         TextField("ROWCOL"),
         TextField('BBOX'),
         TextField("PAGE_GRID_SE"),
         TextField('Rowspan'),
         TextField('Colspan')
     ]
     #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')])
     status = 3
     self.add_indexing_schema(schema)
     status = 4
     self.add_data(data, index_name)
     status = 5
     return [status]
Beispiel #6
0
    def createHub(self):
        logger.info('Creating the hub in the database {}'.format(self._ts))
        # Store the master modules catalog as an object
        self.dconn.jsonset(
            self._hubkey, Path.rootPath(), {
                'created': str(_toepoch(self._ts)),
                'modules': {},
                'submissions': [],
                'submit_enabled': False
            })

        # Create a RediSearch index for the modules
        # TODO: catch errors
        self.sconn.create_index(
            (TextField('name', sortable=True), TextField('description'),
             NumericField('stargazers_count', sortable=True),
             NumericField('forks_count', sortable=True),
             NumericField('last_modified', sortable=True)),
            stopwords=stopwords)
Beispiel #7
0
def to_field(k, v):
    if v == "BOOL":
        return TextField(k, sortable=True)
    elif v == "NUMERIC":
        return NumericField(k, sortable=True)
    elif v == "TEXT":
        return TextField(k)
    elif v == "TAG":
        return TagField(k)
    else:
        return GeoField(k)
    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('WORDCOUNT', sortable=True),
            TextField('BYLINE', no_stem=True, sortable=True),
            TextField('DOCUMENTTYPE', sortable=True),
            TextField('HEADLINE', sortable=True),
            TagField('KEYWORDS', separator=';'),
            NumericField('MULTIMEDIA', sortable=True),
            TextField('NEWDESK', sortable=True),
            NumericField('PRINTPAGE', sortable=True),
            NumericField('PUBDATE', sortable=True),
            TextField('SECTIONNAME', sortable=True),
            TextField('SNIPPET', sortable=True),
            TextField('TYPEOFMATERIAL', sortable=True),
            TextField('WEBURL')
        ])
Beispiel #9
0
def import_brewery_geo(r, rsclient):

    # create the brewery redisearch index
    ftidxfields = [
        TextField('name', weight=5.0),
        TextField('address'),
        TextField('city'),
        TextField('state'),
        TextField('country'),
        NumericField('id', sortable=True),
        GeoField('location')
    ]
    rsclient.create_index([*ftidxfields])

    with open(brewerygeofile) as geofile:
        geo = csv.reader(geofile)
        for row in geo:
            if geo.line_num == 1:
                # skip the header line
                continue

            # use the brewery id to generate the brewery key added earlier
            brewery_key = "{}:{}".format(brewery, row[1])

            # get all the data from the brewery hash
            binfo = r.hgetall(brewery_key)

            if not (any(binfo)):
                print ("\tERROR: Missing info for {}, skipping geo import".format(brewery_key))
                continue

            # add the brewery document to the index
            ftaddfields = {
                'name': binfo[b'name'].decode(),
                'address': binfo[b'address1'].decode(),
                'city': binfo[b'city'].decode(),
                'state': binfo[b'state'].decode(),
                'country': binfo[b'country'].decode(),
                'id': row[1],
                'location': "{},{}".format(row[3], row[2])
            }
            try:
                rsclient.add_document(
                    "brewery:{}".format(row[1]),
                    score=1.0,
                    replace=True,
                    partial=True,
                    **ftaddfields
                )
            except Exception as e:
                print ("\tERROR: Failed to add document for {}: {}".format(brewery_key, e))
                continue
Beispiel #10
0
    def create_user_index(self, users):
        """
        Creates a new user index if not exists
        :param users:
        :return:
        """
        definition = IndexDefinition(prefix=['doc:', 'user:'])

        try:
            self.client.create_index(
                (TextField("first_name"), TextField("last_name"),
                 TextField("email"), NumericField("age"),
                 NumericField("is_employee"),
                 NumericField("user_id", sortable=True)),
                definition=definition)
        except redis.exceptions.ResponseError:
            return False

        indexer = self.client.batch_indexer(chunk_size=len(users))

        for user in users:
            fields = {
                "first_name":
                user.first_name.translate(str.maketrans({"-": r"\-"})),
                "last_name":
                user.last_name.translate(str.maketrans({"-": r"\-"})),
                "email":
                user.email.translate(str.maketrans({"-": r"\-"})),
                "age":
                user.age,
                "user_id":
                user.id,
                "is_employee":
                int(user.is_employee),
            }
            indexer.add_document(f"doc:{user.id}", **fields)
        indexer.commit()

        return True
Beispiel #11
0
 def start(self, data, doc_id, company, project):
     status = 1
     index_name = project + "_DOCUMENT_" + str(doc_id)
     self.drop_index()
     self.client = Client(index_name, self.host, self.port)
     status = 2
     schema = [
         NumericField('INDEX'),
         TextField('DATA'),
         TextField('PAGE'),
         TextField('BBOX')
     ]
     status = 3
     self.add_indexing_schema(schema)
     status = 4
     self.add_data(data, company, doc_id, project)
     status = 5
     return [status]
Beispiel #12
0
    def create_index(self):
        client = self.get_indexed_client()

        definition = IndexDefinition(prefix=['stock:'])

        index_fields = [
            TextField("SC_NAME"),
        ]
        for index in self.numeric_indexes:
            index_fields.append(NumericField(index))

        try:
            # FT.CREATE idx:stock ON HASH PREFIX 1 stock: SCHEMA SC_NAME TEXT ...
            client.create_index(index_fields, definition=definition)
        except redis.exceptions.ResponseError as e:
            # FT.DROPINDEX idx:stock DD
            if (str(e) != "Index already exists"):
                raise e
Beispiel #13
0
def ftadd_beers(r, rsclient):

    # create beer index
    ftidxfields = [
        TextField('name', weight=5.0),
        TextField('brewery'),
        NumericField('breweryid', sortable=True),
        TextField('category'),
        NumericField('categoryid'),
        TextField('style'),
        NumericField('styleid'),
        TextField('description'),
        NumericField('abv', sortable=True),
        NumericField('ibu', sortable=True),
        TagField('favorite')
    ]
    rsclient.create_index([*ftidxfields])

    header = []
    dontadd = 0
    with open(beerfile) as csvfile:
        beers = csv.reader(csvfile)
        for row in beers:
            docid = ''
            docscore = 1.0
            ftaddfields = {}

            if beers.line_num == 1:
                header = row
                continue

            for idx, field in enumerate(row):
                if idx == 0:
                    docid = "{}:{}".format(beer, field)
                    continue

                # idx 1 is brewery name
                if idx == 1:

                    if field == "":
                        # something is wrong with the csv, skip this line.
                        print("\tEJECTING: {}".format(row))
                        dontadd = 1
                        break
                    bkey = "{}:{}".format(brewery, field)
                    ftaddfields['brewery'] = r.hget(bkey, 'name')
                    ftaddfields['breweryid'] = field

                # idx 2 is beer name
                elif idx == 2:

                    ftaddfields['name'] = field

                # idx 3 is category ID
                elif idx == 3:

                    catname = 'None'
                    if int(field) != -1:
                        # get the category key and hget the name of the category
                        ckey = "{}:{}".format(category, field)
                        catname = r.hget(ckey, 'cat_name')

                    ftaddfields['category'] = catname
                    ftaddfields['categoryid'] = field

                # idx 4 is style ID
                elif idx == 4:

                    stylename = 'None'

                    if int(field) != -1:
                        skey = "{}:{}".format(style, field)
                        stylename = r.hget(skey, 'style_name')

                    ftaddfields['style'] = stylename
                    ftaddfields['styleid'] = field

                # idx 5 is ABV
                elif idx == 5:

                    ftaddfields['abv'] = field

                    # update the document score based on ABV
                    docscore = get_beer_doc_score(field)

                # idx 6 is IBU
                elif idx == 6:

                    ftaddfields['ibu'] = field

            if dontadd:
                dontadd = 0
                continue

            # add beer document
            rsclient.add_document(docid, score=docscore, **ftaddfields)
Beispiel #14
0
import gpxpy 
import gpxpy.gpx 
from redisearch import Client, Query, TextField, GeoField, NumericField


client = Client(
   'attractions',
   host='127.0.0.1',
   password='',
   port=6379
   )

client.create_index([
   TextField('title', weight=5.0),
   TextField('description'),
   NumericField('verified', sortable=True),
   GeoField('geo'),
])


gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8')

gpx = gpxpy.parse(gpx_file)

for waypoint in gpx.waypoints:
    if "Verified" in waypoint.comment:
        v = 1
    else:
        v = 0
    t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude)
    client.add_document(
def load_data(redis_server, redis_port, redis_password):
   load_client = Client(
      'fortune500-v1',
      host=redis_server,
      password=redis_password,
      port=redis_port
   )
   load_ac = AutoCompleter(
   'ac',
   conn = load_client.redis
   )
   
   definition = IndexDefinition(
           prefix=['fortune500:'],
           language='English',
           score_field='title',
           score=0.5
           )
   load_client.create_index(
           (
               TextField("title", weight=5.0),
               TextField('website'),
               TextField('company'),
               NumericField('employees', sortable=True),
               TextField('industry', sortable=True),
               TextField('sector', sortable=True),
               TextField('hqcity', sortable=True),
               TextField('hqstate', sortable=True),
               TextField('ceo'),
               TextField('ceoTitle'),
               NumericField('rank', sortable=True),
               NumericField('assets', sortable=True),
               NumericField('revenues', sortable=True),
               NumericField('profits', sortable=True),
               NumericField('equity', sortable=True),
               TagField('tags'),
               TextField('ticker')
               ),        
       definition=definition)

   with open('./fortune500.csv', encoding='utf-8') as csv_file:
      csv_reader = csv.reader(csv_file, delimiter=',')
      line_count = 0
      for row in csv_reader:
         if line_count > 0:
            load_ac.add_suggestions(Suggestion(row[1].replace('"', ''),  1.0))
            load_client.redis.hset(
                    "fortune500:%s" %(row[1].replace(" ", '')),
                    mapping = {
                        'title': row[1],
                        'company': row[1],
                        'rank': row[0],
                        'website': row[2],
                        'employees': row[3],
                        'sector': row[4],
                        'tags': ",".join(row[4].replace('&', '').replace(',', '').replace('  ', ' ').split()).lower(),
                        'industry': row[5],
                        'hqcity': row[8],
                        'hqstate': row[9],
                        'ceo': row[12],
                        'ceoTitle': row[13],
                        'ticker': row[15],
                        'revenues': row[17],
                        'profits': row[19],
                        'assets': row[21],
                        'equity': row[22]

               })
         line_count += 1
   # Finally Create the alias
   load_client.aliasadd("fortune500")
Beispiel #16
0
                'error_message': doc.errorMessage,
                'stack_trace': doc.stackTrace,
                'numberRange': doc.numberRange
            }
            result.append(value_dict)
        print(res)
        return result


document = [
    TextField('clientIp', weight=5.0),
    TextField('service', weight=1.0),
    TextField('errorMessage', weight=10.0),
    TextField('stackTrace'),
    TextField('dateTime', weight=10.0),
    NumericField('numberRange')
]

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Build the Search Index for RediSearch")
    parser.add_argument(
        '-d',
        '--delete',
        dest='del_index',
        help=
        "Deletes the former index. Must only be used if there is an index already",
        required=False,
        const=True,
        default=False,
        nargs='?')
Beispiel #17
0
    def __init__(self, db, prefix: str = None):
        """
            # rBaseDocument
            A RediSearch document but without imput validation

            ## Param
            conn - Redis connection
            prefix - name of the document i.e. PERSONA or None, in this case we take the name of the class

            ## Remarks
            After the index creation (first time) the index definition is no longer synced with 
            the database. You must maintain manually the changes on Redis or simply delete the
            index with: 
            
            ```> FT.DROPINDEX idx:movie```

            And let redis to recreate it. This is usually fast but can't be an option in a production environment.
        """
        self.db = db
        if not prefix:
            prefix = type(self).__name__.upper()
        self.prefix = prefix.upper()
        self.idx = Client(f"idx{self.db.delim}{self.prefix}", conn=db.r)

        # build index list for RediSearch and columns for an html table of the data
        index = []
        self.columns = [
        ]  # list to columns to appear in an auto generated html table
        self.dependant = []  # fields that depends of a foreign key
        self.index = []  # list of index field names
        self.uniques = []  # list of fields that must be uniques
        logger.debug(f"Members of document type {self.prefix}")
        for field in self.Definition():
            logger.debug(f"{field.name}({field.type}): {field.render_kw}")
            if field.render_kw:
                # include field in index
                if field.render_kw.get('indexed', False):
                    self.index.append(
                        field.name)  # append to index field names list
                    if field.type in ('DecimalField', 'FloatField',
                                      'IntegerField'):
                        index.append(NumericField(field.name, sortable=True))
                    else:
                        index.append(TextField(field.name, sortable=True))
                # include field in html table columns
                if field.render_kw.get('on_table', False):
                    self.columns.append(field.name)
                # the field has unique values
                if field.render_kw.get('unique', False):
                    self.uniques.append(field.name)  # append to uniques
                    if not field.name in self.index:  # append to index list
                        self.index.append(field.name)
                        if field.type in ('DecimalField', 'FloatField',
                                          'IntegerField'):
                            index.append(
                                NumericField(field.name, sortable=True))
                        else:
                            index.append(TextField(field.name, sortable=True))

        # build index
        try:
            self.idx.create_index(
                index,
                definition=IndexDefinition(
                    prefix=[f'{self.prefix}{self.db.delim}']))
        except Exception as ex:
            pass
Beispiel #18
0
import sys
import TAS_Redisearch


class TAS_Import():
    def __init__(self, index_name, host="localhost", port=6381, db=0):
        pass

    def add_data(self, data, rsObj):
        for i, rr in enumerate(data):
            index = i + 1
            name, age, location = rr
            rsObj.client.add_document(INDEX=index,
                                      NAME=name,
                                      AGE=age,
                                      LOCATION=location)


if __name__ == "__main__":
    obj = TAS_Redisearch.TAS_Redisearch("USERS", 'localhost', '6381')
    obj.drop_index()
    res = obj.set_schema([
        NumericField('INDEX'),
        TextField('NAME'),
        TextField('AGE'),
        TextField('LOCATION')
    ])
    f = open("input.txt", "r")
    dd = json.loads(f.read())
    data_red = obj.add_data(dd)