コード例 #1
0
def test():
    # Creating a client with a given index name
    client = Client('myIndex')

    # Creating the index definition and schema
    client.drop_index()
    client.create_index([TextField('title', weight=5.0), TextField('body')])

    # Indexing a document
    client.add_document(
        'doc1',
        title='RediSearch',
        body='Redisearch implements a search engine on top of redis')

    # Simple search
    res = client.search("search engine")

    # the result has the total number of results, and a list of documents
    print res.total  # "1"
    print res.docs[0]

    # Searching with snippets
    # res = client.search("search engine", snippet_sizes={'body': 50})

    # Searching with complex parameters:
    q = Query("search engine").verbatim().no_content().paging(0, 5)
    res = client.search(q)
コード例 #2
0
class RediSearchClient(object):
    def __init__(self, index_name):
        self.client = Client(index_name)
        self.index_name = index_name

    def build_index(self, line_doc_path, n_docs):
        line_pool = LineDocPool(line_doc_path)

        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([TextField('title'), TextField('url'), TextField('body')])

        for i, d in enumerate(line_pool.doc_iterator()):
            self.client.add_document(i, nosave = True, title = d['doctitle'],
                    url = d['url'], body = d['body'])

            if i + 1 == n_docs:
                break

            if i % 1000 == 0:
                print "{}/{} building index".format(i, n_docs)

    def search(self, query):
        q = Query(query).paging(0, 5).verbatim()
        res = self.client.search(q)
        # print res.total # "1"
        return res
コード例 #3
0
def create_website_items_index():
    "Creates Index Definition."

    # CREATE index
    client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache())

    try:
        client.drop_index()  # drop if already exists
    except ResponseError:
        # will most likely raise a ResponseError if index does not exist
        # ignore and create index
        pass
    except Exception:
        raise_redisearch_error()

    idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)])

    # Index fields mentioned in e-commerce settings
    idx_fields = frappe.db.get_single_value("E Commerce Settings",
                                            "search_index_fields")
    idx_fields = idx_fields.split(",") if idx_fields else []

    if "web_item_name" in idx_fields:
        idx_fields.remove("web_item_name")

    idx_fields = list(map(to_search_field, idx_fields))

    client.create_index(
        [TextField("web_item_name", sortable=True)] + idx_fields,
        definition=idx_def,
    )

    reindex_all_web_items()
    define_autocomplete_dictionary()
コード例 #4
0
class SearchDemo:
    def __init__(self, args):
        self.index = args.index
        self.client = Client(self.index, host=args.host, port=args.port)

    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('WORDCOUNT', sortable=True),
            TextField('BYLINE', no_stem=True, sortable=True),
            TextField('DOCUMENTTYPE', sortable=True),
            TextField('HEADLINE', sortable=True),
            TagField('KEYWORDS', separator=';'),
            NumericField('MULTIMEDIA', sortable=True),
            TextField('NEWDESK', sortable=True),
            NumericField('PRINTPAGE', sortable=True),
            NumericField('PUBDATE', sortable=True),
            TextField('SECTIONNAME', sortable=True),
            TextField('SNIPPET', sortable=True),
            TextField('TYPEOFMATERIAL', sortable=True),
            TextField('WEBURL')
        ])
コード例 #5
0
class TAS_Import():
    def __init__(self, index_name, host="172.16.20.7", port=6382, db=0):
        self.client = Client(index_name, host, port)
        self.host = host
        self.port = port
        self.index_name = index_name
        self.redis = Redis()

    def add_indexing(self, schema):
        self.client.create_index(schema, False, False, [])
        return ["Done"]

    def add_data(self, data):
        for i, rr in enumerate(data):
            index = i + 1
            print rr
            name, age, location = rr['name'], rr['age'], rr['location']
            self.client.add_document(index,
                                     NAME=name,
                                     AGE=age,
                                     LOCATION=location)
        return ["Done"]

    def drop_index(self):
        try:
            self.client.drop_index()
        except:
            pass
コード例 #6
0
def create_website_items_index():
    "Creates Index Definition."

    # CREATE index
    client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache())

    # DROP if already exists
    try:
        client.drop_index()
    except Exception:
        pass

    idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)])

    # Based on e-commerce settings
    idx_fields = frappe.db.get_single_value('E Commerce Settings',
                                            'search_index_fields')
    idx_fields = idx_fields.split(',') if idx_fields else []

    if 'web_item_name' in idx_fields:
        idx_fields.remove('web_item_name')

    idx_fields = list(map(to_search_field, idx_fields))

    client.create_index(
        [TextField("web_item_name", sortable=True)] + idx_fields,
        definition=idx_def,
    )

    reindex_all_web_items()
    define_autocomplete_dictionary()
コード例 #7
0
def build_ipa_index():
    start_time = time.time()
    rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', ''))
    rs_client = Client('IPAIndex', conn=rc)

    print(
        'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it',
        flush=True)
    ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt'
    ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str)

    print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True)
    ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt'
    ipa_index_ou = pd.read_csv(ipa_index_ou_url,
                               sep='\t',
                               na_values=['da_indicare', '*****@*****.**'],
                               dtype=str)
    ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou']
                                    == 'Ufficio_Transizione_Digitale']

    try:
        rs_client.drop_index()
    except:
        pass  # Index already dropped

    rs_client.create_index([
        TextField('ipa_code', weight=2.0),
        TextField('name', weight=2.0, sortable=True),
        TextField('site'),
        TextField('pec'),
        TextField('city', weight=1.4),
        TextField('county'),
        TextField('region'),
        TagField('type'),
        TextField('rtd_name'),
        TextField('rtd_pec'),
        TextField('rtd_mail'),
    ])
    print('Created index `IPAIndex`', flush=True)

    print('Feeding `IPAIndex` with data from `amministrazioni.txt`',
          flush=True)
    for index, row in ipa_index_amm.iterrows():
        rs_client.add_document(row['cod_amm'],
                               language='italian',
                               replace=True,
                               **get_ipa_amm_item(row))

    print('Feeding `IPAIndex` with data from `ou.txt`', flush=True)
    for index, row in ipa_index_ou.iterrows():
        rs_client.add_document(row['cod_amm'],
                               partial=True,
                               **get_ipa_rtd_item(row))

    finish_time = time.time()
    print('`IPAIndex` build completed in {0} seconds'.format(
        round(finish_time - start_time, 2)),
          flush=True)
コード例 #8
0
class TAS_Import():
    def __init__(self, index_name, host=ip, port=port, db=db):
        self.client = Client(index_name, host, port)
        self.host = host
        self.port = port
        #self.redis = Redis()

    def add_indexing_schema(self, schema):
        self.client.create_index(schema, False, False, [])
        return ["Done"]

    def add_data(self, rdata, company, doc_id, project):
        for i, rr in enumerate(rdata):
            index = doc_id + company + "CMDIC" + str(i + 1) + project
            l1, l2, l3 = rr
            l1 = config_obj.StringEscape(l1)
            self.client.add_document(index, DATA=l1, PAGE=l2, BBOX=l3)
        return ["Done"]

    def drop_index(self):
        try:
            self.client.drop_index()
        except Exception as e:
            #print 'Error',e
            pass

    def start(self, data, doc_id, company, project):
        status = 1
        index_name = project + "_DOCUMENT_" + str(doc_id)
        self.drop_index()
        self.client = Client(index_name, self.host, self.port)
        status = 2
        schema = [
            NumericField('INDEX'),
            TextField('DATA'),
            TextField('PAGE'),
            TextField('BBOX')
        ]
        status = 3
        self.add_indexing_schema(schema)
        status = 4
        self.add_data(data, company, doc_id, project)
        status = 5
        return [status]
class SearchDemo:
    def __init__(self, args):
        self.index = args.index
        self.client = Client(self.index, host=args.host, port=args.port)

    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('ORDERNUMBER'),
            NumericField('QUANTITYORDERED', sortable=True),
            NumericField('PRICEEACH', sortable=True),
            NumericField('ORDERLINENUMBER'),
            NumericField('SALES', sortable=True),
            TextField('ORDERDATE'),
            TextField('STATUS', sortable=True),
            NumericField('QTR_ID', sortable=True),
            NumericField('MONTH_ID', sortable=True),
            NumericField('YEAR_ID', sortable=True),
            TextField('PRODUCTLINE', sortable=True),
            NumericField('MSRP', sortable=True),
            TextField('PRODUCTCODE', sortable=True),
            TextField('CUSTOMERNAME', sortable=True),
            TextField('PHONE'),
            TextField('ADDRESSLINE1'),
            TextField('ADDRESSLINE2'),
            TextField('CITY', sortable=True),
            TextField('STATE', sortable=True),
            TextField('POSTALCODE', sortable=True),
            TextField('COUNTRY', sortable=True),
            TextField('TERRITORY', sortable=True),
            TextField('CONTACTLASTNAME'),
            TextField('CONTACTFIRSTNAME'),
            TextField('DEALSIZE', sortable=True)
        ])
コード例 #10
0
ファイル: views.py プロジェクト: abinash-kumar/pythod
def refresh_search_keys(request):
    if (request.user.is_authenticated() and request.user.is_staff):
        client = Client('productIndex')
        total_old_docts = client.info()['num_docs']
        delete_status = client.drop_index()
        new_index = False
        if delete_status == 'OK':
            new_index = create_product_search_index()
        auto_completer = AutoCompleter('productAutocompleter')
        auto_completer_old_count = auto_completer.len()
        create_product_autocompleter()
        auto_completer_new_count = auto_completer.len()
        return JsonResponse({'success': True})
    else:
        return JsonResponse({'success': False})
コード例 #11
0
def open_redis():
    if not os.path.isdir('./nvd_data_feeds/'):
        os.mkdir('./nvd_data_feeds/')

    print('Creating the docker container with redislabs/redisearch\n')
    Popen([
        'docker', 'run', '--rm', '--name', 'amadeus', '-p', '6379:6379',
        'redislabs/redisearch:latest'
    ])
    sleep(6)

    urls = [
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2021.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2020.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2019.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2018.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2017.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2016.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2015.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2014.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2013.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2012.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2011.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2010.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2009.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2008.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2007.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2006.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2005.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2004.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2003.json.zip',
        'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2002.json.zip',
        'https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.json.zip'
    ]

    print('\nDownloading and unziping json feeds')
    if not os.path.isdir('./downloads/'):
        os.mkdir('./downloads/')
    tam = len(urls)
    dl = 0
    for url in urls:
        name = url.split('/')[-1]
        response = get(url)
        open('./downloads/' + name, 'wb').write(response.content)
        with ZipFile('./downloads/' + name, 'r') as zip_ref:
            zip_ref.extractall('./nvd_data_feeds/')
        dl += 1
        prog = dl / tam
        done = int(50 * prog)
        stdout.write('\r[%s%s%s]%s' %
                     ('Progres > ', '=' * (done - 1) + '>', ' ' *
                      (50 - done), str(round(prog * 100)) + '%'))
    rmtree('./downloads/')
    print('\n')

    print('Start processing CVE feeds')

    # Create a normal redis connection
    conn = redis.Redis('localhost')

    # Creating a client with a given index name
    client = Client('cveIndex')

    # IndexDefinition is avaliable for RediSearch 2.0+
    definition = IndexDefinition(prefix=['cve:'])

    # Creating the index definition and schema
    try:
        client.create_index((TextField('id'), TextField('description'),
                             TextField('configurations')),
                            definition=definition)
    except:
        # Index already exists. Delete and recreate
        client.drop_index()
        print('Index already exists\nDropping\nDelete keys and try again')
        exit()

    def process_CVE_file(file):
        with open(file, 'r', encoding='utf8') as f:
            json = ujson.decode(f.read())
            cve_items = json['CVE_Items']
            for cve_item in cve_items:
                cve_id = cve_item['cve']['CVE_data_meta']['ID']
                cve_desc = cve_item['cve']['description']['description_data'][
                    0]['value']
                cve_configurations = str(cve_item['configurations']['nodes'])
                # Sanitizing special characters to prevent them from being tokenized away
                cve_desc_sanitized = cve_desc.replace(':', 'cc11').replace(
                    '.', 'pp22').replace('*', 'ss33')
                cve_configurations_sanitized = cve_configurations.replace(
                    ':', 'cc11').replace('.', 'pp22').replace('*', 'ss33')
                # Indexing a document for RediSearch 2.0+
                client.redis.hset('cve:' + cve_id,
                                  mapping={
                                      'id':
                                      cve_id,
                                      'description':
                                      cve_desc_sanitized,
                                      'configurations':
                                      cve_configurations_sanitized
                                  })
            print('Processed ' + file)

    with ThreadPoolExecutor(max_workers=20) as pool:
        futures = []
        for i in range(2002, 2021):
            future = pool.submit(
                process_CVE_file,
                './nvd_data_feeds/nvdcve-1.1-{0}.json'.format(i))
            futures.append(future)
        json_list = [x.result() for x in as_completed(futures)]

    print('Done processing CVE feeds\nProcessing NVD CPE match feed')

    with open('./nvd_data_feeds/nvdcpematch-1.0.json', 'r',
              encoding='utf8') as f:
        json = ujson.decode(f.read())
        matches = json['matches']
        for match in matches:
            rootUri = match['cpe23Uri']
            keyName = rootUri
            if 'versionStartIncluding' in match:
                keyName += ';;versionStartIncluding=' + match[
                    'versionStartIncluding']
            if 'versionStartExcluding' in match:
                keyName += ';;versionStartExcluding=' + match[
                    'versionStartExcluding']
            if 'versionEndIncluding' in match:
                keyName += ';;versionEndIncluding=' + match[
                    'versionEndIncluding']
            if 'versionEndExcluding' in match:
                keyName += ';;versionEndExcluding=' + match[
                    'versionEndExcluding']
            if len(match['cpe_name']) > 0:
                # if CPE list is empty no need to include it in cache
                valueString = ';;'.join(x['cpe23Uri']
                                        for x in match['cpe_name'])
                conn.set(keyName, valueString)

    print('\nAMADEUS is already launched!')
コード例 #12
0
ファイル: watcher.py プロジェクト: moluwole/WatchTower
 def delete_index(cls):
     client = Client("tower", port=6379, host=os.getenv('REDIS_HOST'))
     client.drop_index()
コード例 #13
0
try:
    client.info()
except Exception as e:
    if e.args[0] != "Unknown Index name":
        print("You must be running a redis server with the redisearch module installed")
        exit()

# IndexDefinition is avaliable for RediSearch 2.0+
definition = IndexDefinition(prefix=['cve:'])

# Creating the index definition and schema
try:
    client.create_index((TextField("id"), TextField("description"), TextField("configurations")), definition=definition)
except:
    # Index already exists. Delete and recreate
    client.drop_index()
    print("Index already exists. Dropping. Delete keys and try again.")
    exit()

def process_CVE_file(file):
    with open(file, 'r', encoding="utf8") as f:
        json = ujson.decode(f.read())
        cve_items = json['CVE_Items']
        for cve_item in cve_items:
            cve_id = cve_item['cve']['CVE_data_meta']['ID']
            cve_desc = cve_item['cve']['description']['description_data'][0]['value']
            cve_configurations = str(cve_item['configurations']['nodes'])
            # Sanitizing special characters to prevent them from being tokenized away
            cve_desc_sanitized = cve_desc.replace(':','cc11').replace('.','pp22').replace('*','ss33')
            cve_configurations_sanitized = cve_configurations.replace(':','cc11').replace('.','pp22').replace('*','ss33')
            # Indexing a document for RediSearch 2.0+
コード例 #14
0
ファイル: index.py プロジェクト: mrkarezina/awesome-search
class Indexer:
    """
    Scrapes repos found on awesome lists. Inserts repo data into Redis.

    **urls**: List of Github awsome lists URLs.
    """
    def __init__(self, urls: List[str], max_per_list: int = MAX_RES_PER_LIST):
        self.urls = urls
        self.client = Client(INDEX_NAME,
                             host=REDIS_HOST,
                             port=REDIS_PORT,
                             password=REDIS_PASSWORD)
        self.keys = Keys(KEY_PREFIX)
        self.max = max_per_list

    def create_index_definition(self, drop_existing=False):
        """
        Create an index definition. Do nothing if it already exists.
        """

        if drop_existing:
            self.client.drop_index()

        definition = IndexDefinition(prefix=[self.keys.pre("resource:")])
        try:
            self.client.create_index([
                TextField('body', weight=1),
                TextField('repo_name', weight=1.5),
                TextField('language', weight=1),
                TextField('lists')
            ],
                                     definition=definition)
        except ResponseError:
            print("Index already exists.")

    def index(self):
        """
        Insert scraped resources into Redis.
        """
        for url in self.urls:
            parent = RepoScraper(url)
            print(f"Creating index for {parent.repo}")

            self.client.redis.sadd(self.keys.awesome_list_list(), parent.repo)
            resources = AwesomeScrape(url).scrape(max_num=self.max)

            # Create set of all awesome lists a repo appears on
            # Required to set tag feild
            for resource in resources:
                try:
                    self.client.redis.sadd(
                        self.keys.github_repo_lists(resource['owner']['login'],
                                                    resource['name']),
                        parent.repo)
                except (KeyError, DataError):
                    pass

            for resource in resources:
                try:
                    if resource['language'] is not None:
                        language = resource['language']
                        self.client.redis.sadd(self.keys.language_list(),
                                               language)
                    else:
                        language = ''

                    lists = self.client.redis.smembers(
                        self.keys.github_repo_lists(resource['owner']['login'],
                                                    resource['name']))

                    self.client.redis.hset(self.keys.github_repo(
                        resource['owner']['login'], resource['name']),
                                           mapping={
                                               'repo_name':
                                               resource['name'],
                                               'lists':
                                               ", ".join(lists),
                                               'body':
                                               resource['description'],
                                               'stargazers_count':
                                               resource['stargazers_count'],
                                               'language':
                                               language,
                                               'svn_url':
                                               resource['svn_url']
                                           })

                except (KeyError, DataError):
                    print(f"Resource missing data: f{resource}")
コード例 #15
0
class TAS_Redisearch():

    #Constructor
    def __init__(self, table_name, host="localhost", port=6381):
        try:
            self.client = Client(table_name, host, port)
            self.host = host
            self.port = port
            self.table_name = table_name
            self.redis = Redis()
            self.LIMIT = 10
        except Exception as e:
            print 'yyy'
            print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n"
            print >> sys.stderr, e

    #Will set the no of results to show
    def set_result_limit(self, num):
        self.LIMIT = num
        return

    #Defines the schema for Redisearch
    def set_schema(self, schema):
        try:
            return self.client.create_index(
                schema, False, False, []
            )  #last empty list will ensure that default stopwords will not be ignored
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside set_schema Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Deletes index(table)
    def drop_index(self):
        try:
            return self.client.drop_index()
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside drop_index Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Deletes a document(row) by document_index
    def delete_document(self, document_index):
        try:
            return self.client.delete_document(document_index)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside delete_document Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #############################################SEARCHES BELOW#######################################

    #Uses python libraries
    def py_search(self, query, result_limit=-1):
        if result_limit == -1:
            result_limit = self.LIMIT
        try:
            return self.client.search(Query(query).paging(0, result_limit))
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside py_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print sys.stderr, e

    #Search with default parameters [will return dictionary]
    def generic_search(self, search_text, result_limit=-1):
        if result_limit == -1:
            result_limit = self.LIMIT
        query_string = "FT.SEARCH " + self.table_name + " " + search_text + " LIMIT 0 " + str(
            result_limit)
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside generic_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def free_exact_search(self, key, result_limit=-1):
        org_key = key
        l = []
        try:
            if result_limit == -1:
                result_limit = self.LIMIT
            key = self.clean_string(key)
            returned = self.py_search("*", result_limit)
            for result in returned.docs:
                result_dict = vars(result)
                if org_key in result_dict.values():
                    l.append(result_dict)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside value_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e
        return l

    #{fieldname:[value1, value2], fieldname:[value1, value2]}
    def exact_search(self, input_dict, result_limit=-1):
        formed_str = ""
        l = []
        for field, value_list in input_dict.items():
            formed_str += "@" + field + ":("
            for key in value_list:
                key = self.clean_string(key)
                formed_str += "(\'" + key + "\') | "
            formed_str = formed_str.rstrip(' |')
            formed_str += ") "
        print "PASSED: ", formed_str
        returned = self.py_search(formed_str, result_limit)
        print "RETURNED:", returned
        for result in returned.docs:
            result_dict = vars(result)
            for itr, ktr in input_dict.items():
                if result_dict[itr] in ktr:
                    l.append(result_dict)

        return l

    #Search with the passed query
    def custom_search(self, query_string):
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside custom_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Search in 'search_in_field' [if any of the element in 'list_to_union' is found then include it in the result
    def union_search(self, list_to_union, search_in_field):
        query_string = "FT.SEARCH " + self.table_name + " "
        union_text = "@" + search_in_field + ":("
        for text in list_to_union:
            union_text += text + "|"

        union_text = union_text.rstrip("|")
        union_text += ")"
        query_string += union_text
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside union_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #will return all the dictionary for all the categories if no arguments are passed
    def category_taxonomy_dict(self, category='*'):
        try:
            cat_taxo_dict = {}
            total_docs = self.client.info()['num_docs']
            query_string = ""
            if category == '*':
                query_string = category
            else:
                query_string = "@CATEGORY:" + category
            result = self.py_search(query_string, total_docs)
            for single_result in result.docs:
                try:
                    category = single_result.CATEGORY
                    taxoname = single_result.TAXONAME
                except Exception as ex:
                    pass
                if not category in cat_taxo_dict:
                    cat_taxo_dict[category] = []
                elif taxoname not in cat_taxo_dict[category]:
                    cat_taxo_dict[category].append(taxoname)
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside category_taxonomy_dict Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e
        return cat_taxo_dict

    def total_record(self):
        try:
            return int(self.client.info()['num_docs'])
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def get_all_records(self):
        try:
            total = str(self.total_record())
            res = self.redis.execute_command("FT.SEARCH " + self.table_name +
                                             " * LIMIT 0 " + total)
            return Result(res, True)
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def clean_string(self, key):
        key = key.replace(',', ' ')
        key = key.replace('.', ' ')
        key = key.replace('<', ' ')
        key = key.replace('>', ' ')
        key = key.replace('{', ' ')
        key = key.replace('}', ' ')
        key = key.replace('[', ' ')
        key = key.replace(']', ' ')
        key = key.replace('"', ' ')
        key = key.replace('\'', ' ')
        key = key.replace(':', ' ')
        key = key.replace(';', ' ')
        key = key.replace('!', ' ')
        key = key.replace('@', ' ')
        key = key.replace('#', ' ')
        key = key.replace('$', ' ')
        key = key.replace('%', ' ')
        key = key.replace('^', ' ')
        key = key.replace('&', ' ')
        key = key.replace('*', ' ')
        key = key.replace('(', ' ')
        key = key.replace(')', ' ')
        key = key.replace('-', ' ')
        key = key.replace('+', ' ')
        key = key.replace('=', ' ')
        key = key.replace('~', ' ')

        return key
コード例 #16
0
class TAS_Import():
    def __init__(self, index_name, host="172.16.20.7", port=6382, db=0):
        self.client = Client(index_name, host, port)
        self.host = host
        self.port = port
        self.config_obj = redis_config.TAS_AutoCompleter(
            host, port, db, "Default")
        #self.redis = Redis()

    def add_indexing_schema(self, schema):
        self.client.create_index(schema, False, False, [])
        return ["Done"]

    def add_data(self, rdata, index_name):
        for i, rr in enumerate(rdata):
            #print  rr,type(rr[2])
            l1, l2, l3, l4, l5, l6, l7, l8, l9 = rr
            index = index_name + str(i + 1) + l3 + l4 + l5 + l6
            #print 'index_name', index_name, index, l3, l4, l5, l6
            l1 = self.config_obj.StringEscape(l1)
            l2 = l2.strip()
            self.client.add_document(index,
                                     DATA=l1,
                                     SECTION_TYPE=l2,
                                     DOCID=l3,
                                     PAGE=l4,
                                     GRIDID=l5,
                                     ROWCOL=l6,
                                     BBOX=l7,
                                     PAGE_GRID_SE="%s_%s_%s" % (l4, l5, l2),
                                     Rowspan=l8,
                                     Colspan=l9)
        return ["Done"]

    def drop_index(self):
        try:
            self.client.drop_index()
        except Exception as e:
            print 'Error', e
            pass

    def start(self, data, index_name):
        status = 1
        self.drop_index()
        self.client = Client(index_name, self.host, self.port)
        status = 2
        schema = [
            NumericField('INDEX'),
            TextField('DATA'),
            TextField('SECTION_TYPE'),
            TextField('DOCID'),
            TextField('PAGE'),
            TextField('GRIDID'),
            TextField("ROWCOL"),
            TextField('BBOX'),
            TextField("PAGE_GRID_SE"),
            TextField('Rowspan'),
            TextField('Colspan')
        ]
        #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')])
        status = 3
        self.add_indexing_schema(schema)
        status = 4
        self.add_data(data, index_name)
        status = 5
        return [status]