Python indexing Examples

Programming Language: Python

Namespace/Package Name: indexing

Method/Function: indexing

Examples at hotexamples.com: 6

Python indexing - 6 examples found. These are the top rated real world Python examples of indexing.indexing extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: crawler.py Project: LaoLiulaoliu/udacity-search-engine

 def __init__(self, seeds, logger, thread_num=5, max_depth=9, ranks=None, index=None):
     self.init_seeds_num = len(seeds)
     self.tocrawl = {}
     for seed in seeds:
         self.tocrawl[seed] = 0  # {url: current_depth, ...}
     self.crawled = {}           # {url1: None, url2: None, ...}
     self.max_depth = max_depth  # traversal depth
     self.logger = logger
     self.ranks = ranks
     self.down_url = get_url.get_url(logger)
     self.indexing = indexing.indexing()
     if index: self.indexing.index.update(index)
     self.threadpool = thread_pool.thread_pool(thread_num)
     self.lock = threading.Lock()

Example #2

Show file

File: regex1.py Project: prudhviy/pysearchengine

                            # print 'inside else(3) ',m.group(3)
                            # print ' flag1 is ',flag1


if __name__ == "__main__":
    url = raw_input("enter the url(http://www.example.com):")
    # url='http://www.vrsiddhartha.ac.in'
    crawl_obj = webcrawling(url)
    print "inside main", crawl_obj.domain
    for everyurl in hreflist:
        print "call hreflist", everyurl
        crawl_obj.crawl(everyurl)
    print "Error list is as follows", errorlist
    info_list = [crawl_obj.domain, hreflist.__len__()]
    fh = open(path + crawl_obj.domain + "/domaininfo.txt", "w")
    cPickle.dump(info_list, fh)
    fh.close()
    fh = open(path + crawl_obj.domain + "/errorlist.txt", "w")
    cPickle.dump(errorlist, fh)
    fh.close()
    # txt preprocessing#
    print domain
    fh = open(path + domain + "/domaininfo.txt", "r")
    list = cPickle.load(fh)
    fh.close()
    txtpp_obj = textpreprocessing(list[1])
    # indexing#
    indexing_obj = indexing.indexing(path + list[0] + "/")
    indexing_obj.index_start(list[1])
    crawl_obj.urlpickle()

Example #3

Show file

        cof.write("\n")

    vof.close()
    oof.close()
    cof.close()
    print("converting finished")


print("call the c++ program, Good Luck~")

total = args.end - args.start
count = total // args.procs  # number of
procs = []
for i in range(args.procs):
    start = args.start + i * count
    end = args.end if i == args.procs - 1 else args.start + i * count + count
    p = Process(target=worker,
                args=(args.ospray_renderer, args.vti_path, view_path, opacity_path, color_path,
                      args.outdir, args.var, args.rounds, str(up_vector[0]), str(up_vector[1]), str(up_vector[2]), str(start), str(end)))
    procs.append(p)
    p.start()


for p in procs:
    p.join()

# all done? meow meow meow
if args.index:
    from indexing import indexing
    indexing(args.outdir, view, opacity, color)

Example #4

Show file

File: main.py Project: cramt/github_to_pdf

import pdfkit

from indexing import indexing
from sampleData import sample, join

# get input from user
url = "https://github.com/" + input("the url for the github repo -> ")
ignores = input("list of ignores, separated by kommas -> ").split(",")
output_file_name = input("output file name -> ")


# this function takes all the indexes and joins them together
# \n is replaced since where is some weird but where there will be a bunch of "\n" in the final result otherwise
def sanitize_input_for_pdfkit(indexes):
    return join(sample(indexes)).replace("\\n", "")


# get the indexes
indexes = indexing(url, ignores)
# make pdfkit create a pdf
pdfkit.from_string(sanitize_input_for_pdfkit(indexes), output_file_name)

Example #5

Show file

File: setup_indexing.py Project: songkq/pythran

from distutils.core import setup
from Cython.Build import cythonize

setup(name="indexing",
      ext_modules=cythonize('indexing.pyx'),
      script_name='setup.py',
      script_args=['build_ext', '--inplace'])
import indexing
import numpy as np
print(indexing.indexing())

Example #6

Show file

processed_dir = "../processed_data/"
isExists = os.path.exists(processed_dir)
if not isExists:
    os.makedirs(processed_dir)

origin_dir = "../webpages/"
isExists = os.path.exists(origin_dir)
if not isExists:
    os.makedirs(origin_dir)

file_path = "../doc_ku/crawled.txt"

download_html(file_path, origin_dir)
print("Download html finished.")

url_dic = get_url_dic()
file_num = len(url_dic)
processing(origin_dir, processed_dir)
print("Finish processing html files.")
start = time.time()
indexing(url_dic, processed_dir)
end = time.time()
print("Finish indexing.")
print(end - start)
start = time.time()
vs_model_pre(file_num)
end = time.time()
print(end - start)
print("Finish vs model preparation.")