Ejemplo n.º 1
0
 def add_to_index(self, corpus_dir, corpus_type,
                  stop_file=None, tag_file=None, synch_freq=10000):
     """Appends entries from @corpus_dir into the index
     """
     corpus_name = os.path.basename(os.path.normpath(corpus_dir))
     if corpus_name not in self.index_contents:
         index_file = os.path.join(self.directory, 'index.txt')
         word_count_file = os.path.join(
             self.directory, 'total_word_count.txt')
         index.build_index(corpus_dir, corpus_type, stop_file,
                 index_file, tag_file, word_count_file, synch_freq)
         value = (corpus_type, stop_file, tag_file)
         self.index_contents[corpus_name] = value
     self.save_instance()
Ejemplo n.º 2
0
 def test_index_database(self):
     index_object = index.index('test_data/config_file.yaml')
     index.build_index(index_object, 'test_data/config.ini')
     db_keys = index.load_config_file('test_data/config.ini')
     connection = psycopg2.connect(database=db_keys.get('database'),
                                   user=db_keys.get('user'),
                                   password=db_keys.get('password'))
     cursor = connection.cursor()
     cursor.execute("SELECT * FROM index order by 1,2;")
     table_content = cursor.fetchall()
     self.assertEqual([('as', 'test_data/full_file_dirty.txt', [1, 2]),
                       ('bulgaria', 'test_data/full_file_dirty.txt', [1, 3]),
                       ('bulgaria', 'try/try.txt', [22, 84, 99]),
                       ('bush', 'test_data/full_file_dirty.txt', [1]),
                       ('the', 'test_data/full_file_dirty.txt', [1])], table_content)
Ejemplo n.º 3
0
def main() -> None:
    """
    Driver method
    """
    # Read arguments
    filename = sys.argv[1]
    products = sys.argv[2:]

    # Load data and create index
    items = data.read_data(filename)
    item_index, shop_index = index.build_index(items)

    # Find relevant shops
    shops = utils.find_shops(item_index, products)

    # Calculate price for each of them and store in list of tuples format
    #  with first index as shop ID and second as final price -
    #  [
    #       (shop1, total1),
    #       (shop2, total2),
    #       ...
    #  ]
    price_list = [(i, utils.calculate_price(shop_index, products, i))
                  for i in shops]

    if price_list:
        best_price = min(price_list, key=lambda x: x[1])  # Find the best price
    else:  # No relevant shop found
        best_price = None

    print(best_price)
Ejemplo n.º 4
0
 def add_to_index(self,
                  corpus_dir,
                  corpus_type,
                  stop_file=None,
                  tag_file=None,
                  synch_freq=10000):
     """Appends entries from @corpus_dir into the index
     """
     corpus_name = os.path.basename(os.path.normpath(corpus_dir))
     if corpus_name not in self.index_contents:
         index_file = os.path.join(self.directory, 'index.txt')
         word_count_file = os.path.join(self.directory,
                                        'total_word_count.txt')
         index.build_index(corpus_dir, corpus_type, stop_file, index_file,
                           tag_file, word_count_file, synch_freq)
         value = (corpus_type, stop_file, tag_file)
         self.index_contents[corpus_name] = value
     self.save_instance()
Ejemplo n.º 5
0
def main():

  args = get_args()

  time_start = datetime.utcnow()
  logging.info('Index building started...')
  index.build_index(args.embedding_files, LOCAL_INDEX_FILE, args.num_trees)
  time_end = datetime.utcnow()
  logging.info('Index building  finished.')
  time_elapsed = time_end - time_start
  logging.info('Index building  elapsed time: {} seconds'.format(time_elapsed.total_seconds()))

  time_start = datetime.utcnow()
  logging.info('Uploading index artefacts started...')
  upload_artefacts(args.index_file)
  time_end = datetime.utcnow()
  logging.info('Uploading index artefacts finished.')
  time_elapsed = time_end - time_start
  logging.info('Uploading index artefacts elapsed time: {} seconds'.format(time_elapsed.total_seconds()))
Ejemplo n.º 6
0
def create_index():
    build_index()
    return 'create index'
Ejemplo n.º 7
0
    parser.add_argument('-b', nargs='*', default=[],
                        help=u'build,建立新的索引;格式:“频道名”'.encode('GBK'))
    parser.add_argument('-u', nargs='?', const='all', default=False,
                        help=u'update,更新索引;无参数则更新全部,格式:“频道名”'.encode('GBK'))

    parser.add_argument('-n', nargs='?', const=False, default=True,
                        help=u'not,若不下载图片,输入此提示符,无其它参数'.encode('GBK'))
    parser.add_argument('-c', nargs='*', default=[],
                        help=u'channel,频道名;格式:“频道名(1) 频道名(2)”'.encode('GBK'))
    parser.add_argument('-t', nargs='*', default=change_time(),
                        help=u'time,时间范围;格式:“年.月.日(1) 年.月.日(2)”'.encode('GBK'))
    parser.add_argument('-p', nargs='*', default={},
                        help=u'path,保存路径;格式:“频道(1) 路径(1) 频道(2) 路径(2)”'.encode('GBK'))
    args = parser.parse_args()

    if args.a:
        info.add(args.a, args.ad)
    if args.s:
        info.show(args.s)
    if args.d:
        info.delete(args.d)

    if args.b:
        index.build_index(args.b[0])
    if args.u:
        index.update_what(args.u)

    if args.n:
        downlaod.download(args.c, args.t, args.p)
Ejemplo n.º 8
0
        ## Adicionando ambiente Django
        print "Adicionando ambiente Django..."
        sys.path.insert(0, '/Users/phillipe/Projects/storyline')
        os.environ['DJANGO_SETTINGS_MODULE'] = 'project.settings'
        from apps.search.models import Article
    except ImportError:
        print "Ocorreu um erro na importação dos módulos necessários\
         para a construção do índice e inclusão dos documentos."

        raise

    # Criando índice
    try:
        print "O índice está sendo construído, ou obtido (se já existir)..."
        idx = index.build_index()
    except:
        print "Não foi possível obter o índice."
        raise

    try:
        print "Lendo os documentos a serem indexados..."
        docs = Article.objects.all()
    except Article.DoesNotExist:
        print "Não foi possível carregar a lista de documentos."
        raise

    try:
        print "Adicionando os documentos ao índice..."
        add_docs(docs, idx)
        print "Total de documentos indexados:", idx.doc_count()
Ejemplo n.º 9
0
import index

if __name__ == "__main__":
    print()
    print("Welcome to Hannah and Rey's Search Engine!")
    print()

    corpus = sys.argv[1]

    file_directory = json_to_dict(corpus)
    # print(file_directory["0/1"])

    index_file = 'index.txt'
    i = open(index_file, 'w')

    index.build_index(file_directory, corpus)
    # i.write("hello world")
    index.write_index_to_file(i)
    # print(index.dictionary)
    print("# VISITED DOCUMENTS: " + str(index.visitedDocuments))
    print("UNIQUE WORDS: " + str(len(index.dictionary)))

    inf_q = 'informatics_q.txt'
    mondego_q = 'mondego_q.txt'
    irvine_q = 'irvine_q.txt'

    inf = open(inf_q, 'w')
    mon = open(mondego_q, 'w')
    irv = open(irvine_q, 'w')

    inf_query = enter_query()
Ejemplo n.º 10
0
     import os, sys, index
     
     ## Adicionando ambiente Django
     print "Adicionando ambiente Django..."
     sys.path.insert(0, '/Users/phillipe/Projects/storyline')
     os.environ['DJANGO_SETTINGS_MODULE'] = 'project.settings'
     from apps.search.models import Article
 except ImportError:
     print "Ocorreu um erro na importação dos módulos necessários\
      para a construção do índice e inclusão dos documentos."
     raise
 
 # Criando índice
 try:
     print "O índice está sendo construído, ou obtido (se já existir)..."
     idx = index.build_index()
 except:
     print "Não foi possível obter o índice."
     raise
 
 
 try:
     print "Lendo os documentos a serem indexados..."
     docs = Article.objects.all()
 except Article.DoesNotExist:
     print "Não foi possível carregar a lista de documentos."
     raise
 
 try:
     print "Adicionando os documentos ao índice..."
     add_docs(docs, idx)
Ejemplo n.º 11
0
app = Flask(
    __name__,
    static_url_path='',
    static_folder='frontend/dist'
)
app.config['ENV'] = 'development'

INDEX_URL = "http://*****:*****@app.route('/search', methods=['POST'])
def search():
    content = request.get_json(silent = True)
    query = content['query']
    start = time.time()
    docs = process_query(query, index, 100)
    
    result = {}
    result['documents'] = []
    for score, docId in docs:
        line = linecache.getline(RAW_TSV_FILE, docId + 1).strip().split('\t')
        curr_doc = {}
Ejemplo n.º 12
0
try:
        cur_dir = os.path.dirname(os.path.abspath(__file__))
except:
        cur_dir = os.getcwd()

sys.path.append(os.path.join(cur_dir, 'resources', 'lib'))
import index, play

args = urlparse.parse_qs(sys.argv[2][1:])

play_arg = args.get('play', None)
sid_arg = args.get('sid', None)
eid_arg = args.get('eid', None)
category_arg = args.get('category', None)
vtid_arg = args.get('vtid', None)
nid_arg = args.get('nid', None)

if play_arg is not None:
	play.play(url=play_arg[0], nid=nid_arg[0])
elif sid_arg is not None and category_arg is not None and vtid_arg is not None:
	index.build_index(sid=sid_arg[0], category=category_arg[0], vtid=vtid_arg[0])
elif sid_arg is not None and category_arg is not None:
	index.build_index(sid=sid_arg[0], category=category_arg[0])
elif sid_arg is not None and eid_arg is not None:
	index.build_index(sid=sid_arg[0], eid=eid_arg[0])
elif sid_arg is not None:
	index.build_index(sid=sid_arg[0])
else:
	index.build_index()

Ejemplo n.º 13
0
def create_index():
    build_index()
    return 'create index'