Example #1
0
def main():
    '''Generate index files.'''
    # Start Timer
    options = docopt(__doc__)

    base_url = options['--url']  #todo: make sure this is a safe url
    property_name = options['<propertyName>']
    root_dir = options['<root>']
    include_in_global_search = options['--includeInGlobalSearch']

    intro_message = '''
    Indexing {property_name}
    Manifest will {is_global}be included in global property searches.

        url: {base_url}
       root: {root_dir}
    '''.format(property_name=property_name,
               base_url=base_url,
               root_dir=root_dir,
               is_global=('' if include_in_global_search else 'NOT '))
    print(intro_message)

    #Build the Index and manifest
    index = Index(base_url, property_name, root_dir, include_in_global_search)
    index.build()
    manifest = json.dumps(index.manifest)

    print('Exporting index...')
    # Export the manifest
    with open(PROJECT_DIR + 'manifest.json', 'a') as file:
        file.seek(0)
        file.truncate()
        file.write(manifest)
    print('Index written to manifest.json!\n')
    def __init__(self):
        myCrawler = Crawler(self.LINKS)
        crawledURLs = myCrawler.getVisited()
        linkStructure = myCrawler.getLinkStructure()
        print("Link-Struktur:\n")
        myCrawler.printLinkStructure()

        myPageRank = PageRank(linkStructure)
        pageRanks = myPageRank.getPageRank()
        print("\n\nPageRanks:\n")
        myPageRank.printPageRank()

        myIndex = Index(self.STOPWORDS, crawledURLs)
        index = myIndex.getIndex()
        print("\n\nIndex:\n")
        myIndex.printIndex()

        myScorer = Scorer(pageRanks, index,linkStructure)
        #myScorer.usePageRank(True)
        print("\n\nDokumentenlängen:\n")
        myScorer.printDocumentLengths()
        print("\n\nSuchergebnisse:\n")
        myScorer.calculateScores(["tokens"])
        myScorer.calculateScores(["index"])
        myScorer.calculateScores(["classification"])
        myScorer.calculateScores(["tokens", "classification"])
Example #3
0
def extract_text(docno, index_path, collection_type='html'):
    '''
        collection_type: 'html' or 'text'
    '''
    text = '';
    try:
        index = Index(index_path);
        content = index.get_doc_content(docno);
        if collection_type == 'html':
            html_path, text_path, title_path = map(lambda suffix: '%s.%s' % (docno, suffix), suffixes);
            f = open(html_path, 'w')
            f.write(content)
            f.close()
            subprocess.call(['python', extract_script, html_path, text_path, title_path])
            #title_f = open(title_path)
            # first line is the title
            #text = ' '.join(map(str.strip, title_f.readlines())) + '\n'
            text_f = open(text_path)
            text += ''.join(text_f.readlines())
            os.remove(html_path);
            os.remove(text_path);
            os.remove(title_path);
        elif collection_type == 'text': text = content
        
    except Exception, e:
        sys.stderr.write('error at docno %s\n' % docno);
Example #4
0
class PositionalIndexer:
    def __init__(self):
        self.index_db = Index()
        self.preprocessor = PreProcessor()
        self.id_manager = IdManager()

    def index(self, files_parent_directory):
        for (word, file, position) in get_word_by_word(files_parent_directory):
            tokens = self.preprocessor.pre_process(word)
            path = combine_path(files_parent_directory, file)
            doc_id = self.id_manager.get_document_id(path)
            for token in tokens:
                self.index_db.index(token, doc_id, position)

    def save(self, compress: bool):
        ids_filename = 'Index/ids.mir'
        Serialization.write_ids(self.id_manager, ids_filename)
        index_filename = 'Index/index.mir'
        Serialization.write_to_file(self.index_db, index_filename, compress)

    def load(self, compress: bool):
        ids_filename = 'Index/ids.mir'
        self.id_manager = Serialization.load_ids(ids_filename)
        index_filename = 'Index/index.mir'
        self.index_db = Serialization.read_from_file(index_filename, compress)

    def search(self, query: str):
        for token in query.split(' '):
            for term in self.preprocessor.pre_process(token):
                posting_list = self.index_db.find(term)
                term_frequency = len(posting_list)
Example #5
0
    def main(self):

        index = Index()
        stock = Stock()
        sise = Sise()

        average_per_business_category_dict = self.read_average_per_file()
        stock_item_list = stock.get_all_stock_item_list()

        print("종목명/PER/업종PER/상승률/상승하락/2019시초가/현재가/업종코드/업종설명")

        for stock_item in stock_item_list:
            stock_code = stock_item[0]
            per_pair = index.get_stock_item_per(stock_code)
            stock_item_per = per_pair[0].replace(",", "")
            business_average_per = per_pair[1].replace(",", "")

            if stock_item_per == '-' or business_average_per == '-':
                continue

            if float(stock_item_per) > float(business_average_per):
                continue

            increase_rate = sise.get_increase_rate_by_code(stock_code, None)

            if increase_rate is None:
                continue

            print(stock_item[1] + "/" + stock_item_per + "/" +
                  business_average_per + "/" + str(increase_rate[0]) + "/" +
                  increase_rate[1] + "/" + str(increase_rate[2]) + "/" +
                  str(increase_rate[3]) + "/" + stock_item[2] + "/" +
                  average_per_business_category_dict[stock_item[2]][1])
Example #6
0
 def __init__(self, db, cdb):
     # self.cacheIndex = Index()
     # self.cacheIndex.createFromCursor(cdb.index())
     # print self.cacheIndex.find('берзански')
     self.cacheIndex = Index()
     self.cacheIndex.createFromCursor(cdb.findAll())
     self.db = db
     self.numDocs = 127000
Example #7
0
    def selector(self, location):
        """Match the grammar of a selector."""
        ret = None

        # 0 to many patterns.
        while True:
            if self.current.kind == "[":
                self.match("[")

                self.stack.append("ExpressionList")
                expressions = self.expression_list()

                self.match("]")

                for expression in expressions:
                    if not isinstance(location, Location) or not isinstance(location.type, Array):
                        # Not an array variable.
                        try:
                            raise InvalidArray(location.type, self.last)
                        except InvalidArray as e:
                            self.invalid_token(e)

                    cur = Index(location, expression)
                    cur.token = self.last
                    ret = cur
                    location = cur

            elif self.current.kind == ".":
                self.match(".")
                name = self.match("identifier")
                if name == None:
                    continue

                if self.record_scope == None:
                    # Not a record variable.
                    try:
                        raise InvalidRecord(location.type, self.last)
                    except InvalidRecord as e:
                        self.invalid_token(e)
                        break

                # Access the record's scope and find the variable with name.
                self.current_scope = self.record_scope
                entry = self.find(name.value)
                var = VariableAST(name.value, entry)

                self.set_record_scope(entry)

                cur = FieldAST(location, var)
                ret = cur
                location = cur
            else:
                # Pattern ended.
                break

        self.current_scope = self.program_scope
        return ret
Example #8
0
 def read_from_file(filename: str, compress: bool):
     result = Index()
     if not os.path.exists(os.path.dirname(filename)):
         raise ValueError('The path fo loading the index does not exist.')
     with open(filename, 'rb') as inp:
         while True:
             byte = inp.read(4)
             if byte == b'':
                 break
             word_length = int.from_bytes(byte, 'big')
             if word_length == 0:
                 break
             byte = inp.read(word_length)
             if byte == b'':
                 raise ValueError('Malformed index file.')
             word = byte.decode('UTF-8')
             byte = inp.read(4)
             if byte == b'':
                 raise ValueError('Malformed index file.')
             posting_list_length = int.from_bytes(byte, 'big')
             for i in range(posting_list_length):
                 document_id: int
                 position: int
                 if compress:
                     document_id_array = []
                     while True:
                         byte = inp.read(1)
                         if byte == b'':
                             raise ValueError('Malformed index file.')
                         document_id_array.append(byte)
                         if byte >= b'\x80':
                             break
                     document_id = VariableByteUtility.decode(
                         b''.join(document_id_array))
                     position_array = []
                     while True:
                         byte = inp.read(1)
                         if byte == b'':
                             raise ValueError('Malformed index file.')
                         position_array.append(byte)
                         if byte >= b'\x80':
                             break
                     position = VariableByteUtility.decode(
                         b''.join(position_array))
                 else:
                     byte = inp.read(4)
                     if byte == b'':
                         raise ValueError('Malformed index file.')
                     document_id = int.from_bytes(byte, 'big')
                     byte = inp.read(4)
                     if byte == b'':
                         raise ValueError('Malformed index file.')
                     position = int.from_bytes(byte, 'big')
                 result.index(word, document_id, position)
         return result
Example #9
0
    def __init__(self, filename, stopfile):
        self.schema = commentjson.loads(open(filename, 'r').read())

        #just convenience
        self.ents = self.schema['ents']
        self.types = self.schema['types']
        self.ind = Index(self.types, stopfile)
        self.indexFile = 'index'
        if 'links' in self.schema:
            self.defaults = self.schema['defaults']
            self.links = self.schema['links']
Example #10
0
 def get_index(self, id):
     index = Index(id)
     buffer = index.get_xml().encode('utf-8')
     hash = md5(buffer).digest()
     self.ssend('\x03' + hash)
     flag = self.srecvall(1)
     if flag == "\x04":
         buffer = self.recv_file()
         index.set_xml(buffer)
     elif flag == "\x05":
         pass
     else:
         raise Exception, "Protocol Error"
Example #11
0
    def index_scripts(self, script_paths: list, index: Index) -> None:
        for relative_path in script_paths:
            try:
                source_path = self._get_absolute_script_path(relative_path)
            except FileExistsError:
                source_path = ''.join([
                    x for x in self.get_script_paths(True)
                    if x.endswith(relative_path)
                ])
                if source_path == '' or not os.path.exists(source_path):
                    raise

            index.write_file(source_path)
Example #12
0
File: Client.py Project: sok63/vpyn
 def get_index(self, id):
     index = Index(id)
     buffer = index.get_xml().encode('utf-8')
     hash = md5(buffer).digest()
     self.ssend('\x03'+hash)
     flag = self.srecvall(1)
     if flag == "\x04":
         buffer = self.recv_file()
         index.set_xml(buffer)
     elif flag == "\x05":
         pass
     else:
         raise Exception, "Protocol Error"
Example #13
0
def test_weighter():
    parser = ParserCACM()
    textRepresenter = PorterStemmer()
    fname = "data/cacm/cacm.txt"
    I = Index(parser, textRepresenter)
    I.indexation(fname)
    weighters = [Binary(I), TF(I), TF_IDF(I), Log(I), Log_plus(I)]
    for i, w in enumerate(weighters):
        print "Test of weighter" + str(i)
        print "getDocWeightsForDoc"
        print w.getDocWeightsForDoc("20")
        print "getDocWeightsForStem"
        print w.getDocWeightsForStem("accelerat")
        print "getDocWeightsForQuery"
        print w.getWeightsForQuery(I.getTfsForDoc("20"))
Example #14
0
    def __init__(self, settings, screen, button):
        self.screen = screen
        self.screen_rect = screen.get_rect()
        self.theSettings = Settings

        self.image = Index().a1[0]
        self.image2 = Index().a2[0]
        self.image3 = Index().a3[0]
        self.rect = self.image.get_rect()

        self.text_color = (0, 255, 0)
        self.font = pygame.font.SysFont(None, 48)
        screen = pygame.display.set_mode(
            (settings.screen_width, settings.screen_height))
        screen.fill(settings.bg_color)
Example #15
0
    def calculateScores(self, query):
        scores = {}

        for term in query:
            term = Index.normalize(term)

        queryLength = self.__calculateQueryLength(query)
        for term in query:
            if term in self.__index:
                postingList = self.__index[term]
                df = len(postingList) + 0.0
                wtq = math.log10(self.__N/df)

                for site in postingList:
                    if site not in scores:
                        scores[site] = 0
                    tf = postingList[site]
                    wtd = (1 + math.log10(tf)) * wtq
                    scores[site] += wtd * wtq
        for site in scores:
            scores[site] /= self.__documentLengths[site] * queryLength

        if self.__pageRankIsOn:
            for site in scores:
                scores[site] *= self.__pageRanks[site]

        scores = (sorted(scores.items(), key = operator.itemgetter(1)))
        scores.reverse()
        scores = collections.OrderedDict(scores)

        #print scores
        print(query)
        for site in scores:
            print(site + ':    ' + str(format(scores[site], '.6F')))
Example #16
0
 def reset(self):
     # cleanup database
     self.db.clear()
     self.msgMngr.clear()
     # create indexes
     self.db.create(Index(), OID_I_LOGIN2OID)
     self.db.create(Index(), OID_I_NAME2OID)
     # create admin
     self.registerPlayer('admin', self.createAdmin(), OID_ADMIN)
     # create universe
     self.db.create(self.cmdPool[T_UNIVERSE].new(T_UNIVERSE), OID_UNIVERSE)
     self.createUniverse()
     # save all informations
     self.db.checkpoint()
     self.msgMngr.checkpoint()
     self.clientMngr.checkpoint()
    def __init__(self):
        super(Tools, self).__init__()

        vboxIndex = gtk.VBox(False, 5)
        aIndex = gtk.Alignment(0.5, 0.25, 0, 0)
        aIndex.add(Index())

        vboxAlign = gtk.VBox(False, 5)
        aAlign = gtk.Alignment(0.5, 0.25, 0, 0)
        aAlign.add(Alignment())

        #Viewer notebook
        vboxViewer = gtk.VBox(False, 5)
        aViewer = gtk.Alignment(0.5, 0.25, 0, 0)
        aViewer.add(Viewer())

        vboxIndex.pack_start(aIndex)
        vboxAlign.pack_start(aAlign)
        vboxViewer.pack_start(aViewer)

        self.set_tab_pos(gtk.POS_TOP)
        self.append_page(vboxIndex)
        self.set_tab_label_text(vboxIndex, config.LOC["tools_index"])
        self.append_page(vboxAlign)
        self.set_tab_label_text(vboxAlign, config.LOC["tools_align"])
        self.append_page(vboxViewer)
        self.set_tab_label_text(vboxViewer, config.LOC["tools_viewer"])
Example #18
0
def initializePortfolio():
    write("Initializing Portfolio... ")
    flush()

    items = config.getElementsByTagName('item')
    indices = config.getElementsByTagName('index')

    for item in items:
        tickerNode = item.getElementsByTagName('ticker')
        symbol = tickerNode[0].firstChild.data

        averagePriceNode = item.getElementsByTagName('averagePrice')
        averagePrice = float(averagePriceNode[0].firstChild.data)
        shareCountNode = item.getElementsByTagName('shareCount')
        shareCount = int(shareCountNode[0].firstChild.data)

        name = item.getAttribute('name')

        asset = Asset(symbol, name, averagePrice, shareCount)

        portfolio.addAsset(asset)

    for index in indices:
        tickerNode = index.getElementsByTagName('ticker')
        symbol = tickerNode[0].firstChild.data

        name = index.getAttribute('name')

        indexAsset = Index(symbol, name)

        portfolio.addIndex(indexAsset)

    write("done")
    flush()
Example #19
0
    def __init__(self, ai_settings, screen):
        super(Ship, self).__init__()
        self.screen = screen
        self.ai_settings = ai_settings

        ims = Index()
        self.images = ims.s1
        self.explode = ims.E1

        self.index = 0
        self.image = self.images[self.index]
        self.rect = self.image.get_rect()
        self.screen_rect = screen.get_rect()
        self.timer = pygame.time.get_ticks()
        # Start each new ship at the bottom center of the screen.
        self.rect.centerx = self.screen_rect.centerx
        self.rect.bottom = self.screen_rect.bottom
        self.death_index = None
        self.last_frame = None

        self.dieing = False
        self.dead = False

        # Store a decimal value for the ship's center.
        self.center = float(self.rect.centerx)

        # Movement flags.
        self.moving_right = False
        self.moving_left = False
 def __init__(self, db, cdb):
     # self.cacheIndex = Index()
     # self.cacheIndex.createFromCursor(cdb.index())
     # print self.cacheIndex.find('берзански')
     self.cacheIndex = Index()
     self.cacheIndex.createFromCursor(cdb.findAll())
     self.db = db
     self.numDocs = 127000
def index():
    idx = Index("bitstamp", "btcusd", "12-h")
    candle_matrix, closePrice_matrix = idx.create_matrix()
    openPrice_ar, closePrice_ar = idx.create_np_array(candle_matrix,
                                                      closePrice_matrix)
    print("Check openPrice_ar and candle_matrix: ",
          openPrice_ar == candle_matrix[:, 2])  #EXPECTED true
    print("Check closePrice_ar and closePrice_matrix: ",
          closePrice_ar == closePrice_matrix)  #EXPECTED true
    print("openPrice_ar shape: ", openPrice_ar.shape)
    print("closePrice_ar: ", closePrice_ar.shape)
    op_mean, cp_mean, op_q0, op_q1, op_q2, op_q3, op_q4, cp_q0, cp_q1, cp_q2, cp_q3, cp_q4, corrMatrix = idx.index(
        openPrice_ar, closePrice_ar)
    idx.print_dotplot(openPrice_ar, closePrice_ar, op_mean, cp_mean, op_q0,
                      op_q1, op_q2, op_q3, op_q4, cp_q0, cp_q1, cp_q2, cp_q3,
                      cp_q4)
    idx.scatterplot(openPrice_ar, closePrice_ar)
    idx.correlationMatrix(corrMatrix)
Example #22
0
def initIndex(database_file):
    """Init Index or load it if previously computed"""
    sys.stdout.write("Indexing database...")
    sys.stdout.flush()
    if os.path.isfile('Index.p'):
        I = pickle.load(open("Index.p", "rb"))

    else:
        parser = ParserCLEF08()
        textRepresenter = PorterStemmer()
        I = Index(parser, textRepresenter)
        I.indexation(database_file)
        I.parser = None
        pickle.dump(I, open("Index.p", "wb"))

    sys.stdout.write("Done!\n")
    sys.stdout.flush()

    return I
Example #23
0
def main():
    """Determine arguments and pass arguments to compiler"""

    _options = namedtuple(
        'ProjectOptions',
        'game_type input_path disable_anonymizer disable_bsarch disable_indexer'
    )
    _options.disable_anonymizer = _args.disable_anonymizer
    _options.disable_bsarch = _args.disable_bsarch
    _options.disable_indexer = _args.disable_indexer
    _options.game_type = GameType.from_str(_args.game)
    _options.input_path = _args.input

    _project = Project(_options)

    time_elapsed = TimeElapsed()

    ppj = PapyrusProject(_project)

    # the index is used to exclude unchanged scripts from compilation
    absolute_script_paths = ppj.get_script_paths(absolute_paths=True)
    file_name, file_extension = os.path.splitext(
        os.path.basename(ppj.input_path))
    project_index = Index(file_name, absolute_script_paths)

    ppj.compile_custom(project_index, time_elapsed)

    no_scripts_modified = False
    missing_scripts_found = False

    if _options.disable_indexer:
        pex_paths = ppj.get_script_paths_compiled()
    else:
        pex_paths, validation_states = ppj.validate_project(
            project_index, time_elapsed)
        no_scripts_modified = len(
            validation_states
        ) == 1 and ValidationState.FILE_NOT_MODIFIED in validation_states
        missing_scripts_found = ValidationState.FILE_NOT_EXIST in validation_states

    if _options.disable_anonymizer:
        log.warn('Anonymization disabled by user.')
    elif no_scripts_modified:
        log.error(
            'Cannot anonymize compiled scripts because no source scripts were modified'
        )
    else:
        ppj.anonymize_scripts(pex_paths, ppj.output_path)

    if missing_scripts_found:
        log.error('Cannot pack archive because there are missing scripts')
    else:
        ppj.pack_archive()

    time_elapsed.print()
Example #24
0
def main(args):
    global korpus

    help_flags = ["-h", "--help"]
    build_flags = ["-b", "--build"]
    n_flag = ["-n"]

    def flag_set(flags):
        return any([flag in args for flag in flags])

    if len(args) == 1 or flag_set(help_flags):
        print_usage()
        return

    building = (flag_set(build_flags) or not os.path.isfile(INDEX_FILENAME))

    n = None
    word = args[-1]
    if flag_set(n_flag):
        param = args[args.index("-n") + 1]
        if not param.isdigit() or int(param) < 0:
            print("Fel parameter till -n: ", param)
            return
        else:
            n = int(param)

    with Korpus(KORPUS_PATH) as korpus:
        index = Index()
        if building:
            print("Bygger index.")
            index.build(korpus)
            print("Index färdigbyggt.")
            if word in build_flags:
                return
        try:
            indices = index[word]
        except Exception as e:
            print("\nOjdå, det här gick inge bra: ", e, "\n")
            print_usage()
        else:
            offset = 30 + len(word)
            print_results(indices, n, offset)
    def __init__(self, server, lst):
        """
        We create a new Database instance by giving it the server instance (usefull to get the
        server name from the database) and the list of the content of the tbschema formatted file.
        :param server: The server instance
        :param lst: The list of the tbschema content
        """

        # Server instance
        self.server = server

        # Database's name
        for i in get_all_keyword_position_in_list('database ', lst):
            self.database_name = lst[i].split()[1][:-1]

        # Dictionnary of tables
        self.tables_dictionnary = {}
        for i in get_all_keyword_position_in_list('create table ', lst):
            table_name = get_table_name(lst[i])
            self.tables_dictionnary[table_name] = Table(self, lst[i][:-1])

        # Dictionnary of indexes
        self.indexes_dictionnary = {}
        for i in get_all_keyword_position_in_list('create index ', lst):
            index_name = get_index_name(lst[i], False)
            self.indexes_dictionnary[index_name] = Index(self, lst[i][:-1])

        for i in get_all_keyword_position_in_list('create unique index ', lst):
            index_name = get_index_name(lst[i], True)
            self.indexes_dictionnary[index_name] = Index(self, lst[i][:-1])

        # Dictionnary of grants
        self.grants_dictionnary = {}
        for i in get_all_keyword_position_in_list('grant ', lst):
            grant_key = get_grant_key(lst[i])
            self.grants_dictionnary[grant_key] = Grant(self, lst[i][:-1])

        # Dictionnary of revokes
        self.revokes_dictionnary = {}
        for i in get_all_keyword_position_in_list('revoke ', lst):
            revoke_key = get_revoke_key(lst[i])
            self.revokes_dictionnary[revoke_key] = Revoke(self, lst[i][:-1])
Example #26
0
def main(args):
    global korpus
    
    help_flags = ["-h", "--help"]
    build_flags = ["-b", "--build"]
    n_flag = ["-n"]
    
    def flag_set(flags):
        return any([flag in args for flag in flags])
    
    if len(args) == 1 or flag_set(help_flags):
        print_usage()
        return
        
    building = (flag_set(build_flags) or not os.path.isfile(INDEX_FILENAME))
    
    n = None
    word = args[-1]
    if flag_set(n_flag):
        param = args[args.index("-n") + 1]
        if not param.isdigit() or int(param) < 0:
            print("Fel parameter till -n: ", param)
            return
        else:
            n = int(param)
    
    with Korpus(KORPUS_PATH) as korpus:
        index = Index()
        if building:
            print("Bygger index.")
            index.build(korpus)
            print("Index färdigbyggt.")
            if word in build_flags:
                return
        try:
            indices = index[word]
        except Exception as e:
            print("\nOjdå, det här gick inge bra: ", e, "\n")
            print_usage()
        else:
            offset = 30 + len(word)
            print_results(indices, n, offset)
	def doc_to_vector_all(self,filename,feature_filename,all_feature):
		"""use features to make doc to vector"""
		docsvector=self.count_tfidf_all(all_feature)

		with open(feature_filename,'w') as featurefile:
			for word in sorted(self.feature):
				featurefile.write('%s\n' % word)

		my_index=Index(self.name)
		with open(filename,'w') as traintext:
			for doc in self.docs:
				traintext.write('%d:' % doc)
				feature_pos=0
				for tfidf in docsvector[doc]:
					if tfidf:
						my_index.create_Index(feature_pos,doc)
						traintext.write('\t<%d,%f>' % (feature_pos,tfidf))
					feature_pos+=1
				traintext.write('\n')
		my_index.record_Index()
Example #28
0
 def load_indexes(self):
     'adds indexes specified in the Config/study.cfg file'
     #This is a list of objects, NOT directories. A list of directories is in self.MainConfig
     self.Indexes = {}
     self.IndexList = []
     for index_name, index_path in self.config.indexes.iteritems():
         index_path = os.path.join(self.path, index_path)
         config_path = os.path.join(self.path, 'Config', 'Indexes',
                                    index_name + '.cfg')
         self.Indexes[index_name] = Index(self, index_name, index_path,
                                          config_path)
         self.IndexList.append(self.Indexes[index_name])
Example #29
0
 def build_ii(self):
     start = time.time()
     index = Index(self._config, callback=self.print_callback)
     index.process_blocks()
     index.merge_blocks()
     end = time.time()
     print("La construcción del índice demoró %s segundos" % (end - start))
Example #30
0
def extract_text(docno, index_path):
    text = '';
    try:
        index = Index(index_path);
        content = index.get_doc_content(docno);
        html_path, text_path, title_path = map(lambda suffix: '%s.%s' % (docno, suffix), suffixes);
        f = open(html_path, 'w');
        f.write(content);
        f.close();

        subprocess.call(['python', extract_script, html_path, text_path, title_path]);

        title_f = open(title_path);
        # first line is the title
        text = ' '.join(map(str.strip, title_f.readlines())) + '\n';
        text_f = open(text_path);
        text += ''.join(text_f.readlines());
        
        #os.remove(html_path);
        #os.remove(text_path);
        #os.remove(title_path);
    except Exception, e:
        sys.stderr.write('error at docno %s\n' % docno);
Example #31
0
 def reset(self):
     # cleanup database
     self.db.clear()
     self.msgMngr.clear()
     # create indexes
     self.db.create(Index(), Const.OID_I_LOGIN2OID)
     # create admin
     self.registerPlayer(Const.ADMIN_LOGIN, self.createAdmin(), Const.OID_ADMIN)
     # create universe
     self.createUniverse()
     # save all informations
     self.db.checkpoint()
     self.msgMngr.checkpoint()
     self.clientMngr.checkpoint()
Example #32
0
def main():

	crawler = Crawler([
		"http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d01.html",
		"http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d06.html",
		"http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d08.html"
	])
	crawler.crawl()

	bank = crawler.get_bank()
	bank.sortBank()

	print '\nLinkstruktur: \n' 
	bank.printOutgoing()

	print '\nPageRanks:'
	rank = PageRank(bank, 0.95, 0.04)
	rank.calculate()

	print '\n\nIndex: \n'
	i = Index( bank )
	i.printIndex()

	s = Scorer( 'tokens', i )
	
	print '\nDokumentenlaenge: \n'
	s.printDocumentLength()
	
	print '\nSuchergebnisse: \n'
	s.printScoring()
	s = Scorer( 'index', i )
	s.printScoring()
	s = Scorer( 'classification', i )
	s.printScoring()
	s = Scorer( 'tokens classification', i )
	s.printScoring()
Example #33
0
    def __init__(self, ai_settings, screen, alien):
        super(Lazer, self).__init__()
        self.screen = screen

        ims = Index()
        self.images = ims.L1

        self.index = 0
        self.image = self.images[self.index]
        self.rect = self.image.get_rect()
        self.rect.centerx = alien.rect.centerx
        self.rect.top = alien.rect.bottom
        self.timer = pygame.time.get_ticks()
        self.y = float(self.rect.y)
        self.speed_factor = ai_settings.lazer_speed
Example #34
0
    def __init__(self, ai_settings, screen):
        super(Alien, self).__init__()
        self.screen = screen
        self.ai_settings = ai_settings
        self.type = 0

        # Load the alien image, and set its rect attribute.
        ims = Index()
        self.images = ims.a1
        self.timer = 0
        self.index = 0
        self.image = self.images[self.index]
        self.rect = self.image.get_rect()
        self.image = self.images[self.index]
        self.Explode = Index().E1
        self.rect = self.image.get_rect()
        self.rect.x = self.rect.width
        self.rect.y = self.rect.height
        self.x = float(self.rect.x)

        self.dead = False
        self.death_index = None

        self.last_frame = None
Example #35
0
 def add_index(self, name=None, kind=None, columns=None, json=None):
 
     # input validation
     if kind is not None and kind not in ["PRIMARY", "UNIQUE", "INDEX"]:
         raise CSVTableError("Index must be of the following: "
                             "PRIMARY,UNIQUE,INDEX")
     if columns is not None and not set(columns) <= set(self._columns):
         raise CSVTableError("Index must use columns of the "
                             "containing database")
         
     # make sure primary index is kept singular
     idx = Index(name, self, kind, columns, json)
     if kind == "PRIMARY":
         self._indexes["PRIMARY"] = idx
     else:
         self._indexes[name] = idx
    def populate_web(self):
        """
        Populates the web and the inverted index keyword
        dictionary with the urls provided
        """
        occurdic = {}
        for url in self.urls:
            page = scrape(url)
            keywords = get_keywords(page.text)
            index = len(self.web)
            self.web.append(Index(index, page.title, page.url, page.links_to))

            for word in keywords:
                value = OccurrenceList()
                value.append(index)
                occurdic[word.lower()] = value
                self.keywords.add(word.lower(), value)
        self.rank_page(occurdic, len(self.web))
Example #37
0
 def save_text(page_url):
     with urllib.request.urlopen(page_url) as url:
         s = url.read()
     soup = BeautifulSoup(s, "html.parser")
     for script in soup(["script",
                         "style"]):  # kill all script and style elements
         script.extract()  # rip it out
     text = soup.get_text()  # get text
     lines = (line.strip() for line in text.splitlines()
              )  # break into lines and remove leading and trailing space
     # on each
     chunks = (phrase.strip() for line in lines
               for phrase in line.split("  ")
               )  # break multi-headlines into a
     # line each
     text = '\n'.join(chunk for chunk in chunks
                      if chunk)  # drop blank lines
     write_a_csv(Index(text, page_url))
Example #38
0
    def __init__(self, ai_settings, screen):
        Alien.__init__(self, ai_settings, screen)
        self.type = 1
        self.screen = screen
        self.ai_settings = ai_settings
        self.value = 10
        ims = Index()
        self.images = ims.a1

        self.index = 0
        self.image = self.images[self.index]
        self.rect = self.image.get_rect()
        self.timer = pygame.time.get_ticks()

        # Start each new alien near the top left of the screen.
        self.rect.x = self.rect.width
        self.rect.y = self.rect.height

        # Store the alien's exact position.
        self.x = float(self.rect.x)
Example #39
0
	def __init__(self, host, port, server_key, authorized_keys, host_keys=None, incoming=None):
		threading.Thread.__init__(self)
		self.server_socket = self.create_socket(host, port)
		self.incoming = (incoming if incoming else queue.Queue())
		self.private_key = paramiko.RSAKey(filename=server_key)
		self.authorized_keys = authorized_keys
		self.host_keys = host_keys
		self.selector = selectors.DefaultSelector()
		self.num_conn = 0
		self.stop_event = threading.Event()
		self.index = Index()
		# Filesystem observer
		self.observer = Observer()
		# Server functionality
		self.channels = {}
		self.paramiko_server = SSHServer(self.authorized_keys)
		# Client functionality
		self.clients = []
		self.to_watch = queue.Queue() # ???
		# Responder threads
		handler = FileSystemEventHandler(self.index, self.incoming)
		self.responder = Responder(index=self.index, channels=self.channels, incoming=self.incoming, observer=self.observer, handler=handler)
Example #40
0
    def __init__(self, ai_settings, screen):
        Alien.__init__(self, ai_settings, screen)
        self.type = 4
        self.screen = screen
        self.ai_settings = ai_settings
        self.on = False
        # Load the alien image, and set its rect attribute.
        ims = Index()
        self.images = ims.a4

        self.index = 0
        self.image = self.images[self.index]

        self.rect = self.image.get_rect()
        self.timer = pygame.time.get_ticks()

        # Start each new alien near the top left of the screen.
        self.rect.x = self.rect.width
        self.rect.y = self.rect.height

        # Store the alien's exact position.
        self.x = float(self.rect.x)
Example #41
0
    def _build_commands(self, index: Index) -> list:
        commands = list()

        unique_imports = self._get_imports_from_script_paths()
        script_paths = self.get_script_paths()

        arguments = Arguments()

        for script_path in script_paths:
            if not self.project.options.disable_indexer:
                if index.compare(script_path):
                    continue

            arguments.clear()
            arguments.append_quoted(self.compiler_path)
            arguments.append_quoted(script_path)
            arguments.append_quoted(self.output_path, 'o')
            arguments.append_quoted(';'.join(unique_imports), 'i')
            arguments.append_quoted(self.flags_path, 'f')

            if self.project.is_fallout4:
                release = self.root_node.get('Release')
                if release and release.casefold() == 'true':
                    arguments.append('-release')

                final = self.root_node.get('Final')
                if final and final.casefold() == 'true':
                    arguments.append('-final')

            optimize = self.root_node.get('Optimize')
            if optimize and optimize.casefold() == 'true':
                arguments.append('-op')

            commands.append(arguments.join())

        return commands
def runner(
        PATH_DATA,
        RATIO_TEST_DATA,
        RATIO_SPECIFICITY,
        RATIO_CONFIDENCE,
        EXPERIMENTS,
        fe,
        setting_name
    ):

    results = []
    errors = Counter()
    qtypes = QuestionTypes()
    for e in range(1, EXPERIMENTS + 1):

        start = time.time()
        dataset = Dataset(PATH_DATA)
        dataset.load()

        invprob = InverseProbabilities(dataset)
        index = Index(invprob)

        train = [
    #         (bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob) + bow(fe, text, prob_filter=invprob), label, mark)
            (bow(fe, text, RATIO_SPECIFICITY, prob_filter=invprob), label, mark)
            for text, label, mark in dataset.train()
        ]
        train = train * 4

        test = [
            (bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob), label, mark)
#             (bow(fe, text, RATIO_SPECIFICITY, prob_filter=invprob), label, mark)
            for text, label, mark in dataset.test()
            if mark
        ][:int(len(train) * RATIO_TEST_DATA)]

        test += [
            (bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob), label, mark)
#             (bow(fe, text, RATIO_SPECIFICITY, prob_filter=invprob), label, mark)
            for text, label, mark in dataset.test()
            if not mark
        ][:len(test)]

        for tbow, label, mark in train:
            index.update(tbow)
            index.add(label)

        tp, tn, fp, fn, prec, rec, f, duration = 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0
        marked = sum([1 for _, _, mark in test if mark])
        for tbow, label, mark in test:
            qtypes.increment(label)
            expectation = sum([
                invprob[w]
                for w in set(bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob))
            ])
            matches = index(tbow)

            if not matches and not mark:
                tn += 1
                continue
            elif not matches and mark:
                fn += 1
                errors[('fn', '', label)] += 1
                qtypes.update('fn', None, label)
                continue

            best_match = matches[0]
            guess = best_match[2]
            sim = best_match[0]
            ratio = sim / (expectation + 0.1)

            if ratio <= RATIO_CONFIDENCE:
                if not mark:
                    tn += 1
                    continue
                else:
                    fn += 1
                    errors[('fn', '', label)] += 1
                    qtypes.update('fn', None, label)
                    continue
            else:
                if mark and guess == label:
                    tp += 1
                else:
                    fp += 1
                    _qtype = '_'.join(guess.lower().split()[:2])
                    errors[('fp', guess, label)] += 1
                    qtypes.update('fp', guess, label)

            duration = time.time() - start
            if tp:
                prec = tp / float(tp + fp)
                rec = tp / float(tp + fn)
                f = f1(prec, rec)
            else:
                prec, rec, f = 0.0, 0.0, 0.0

        vector = (e, _r(tp), _r(tn), _r(fp), _r(fn),
                  _r(prec), _r(rec), _r(f), _r(duration))
        results.append(vector)

        print '%d, tp: %d, tn: %d, fp: %d, fn: %d, all: %d, prec: %.2f, rec: %.2f, f1: %.2f, time=%.2f' % (e, tp, tn, fp, fn, sum([tp, tn, fp, fn]), prec, rec, f, duration)
        precs, recs, fs = zip(*results)[-4:-1]
        print e, avg(precs), avg(recs), avg(fs)
        print '---'

    if not results:
        return None

    cols = columns(results)
    columns_int = [avg(col) for col in cols[:4]]
    columns_float = [_r(avg(col)) for col in cols[4:]]
    summary_row = [
        tuple(['all'] + columns_int + columns_float)
    ]

    create_folder(RESULTS_FOLDER)
    to_csv(
        RESULTS_KEYS + results + summary_row,
        '%ssecond_task.%s.results.csv' % (RESULTS_FOLDER, setting_name)
    )

    to_csv(
        [tuple([f] + list(key)) for key, f in errors.most_common()],
        '%ssecond_task.%s.errors.csv' % (RESULTS_FOLDER, setting_name)
    )

    to_csv(
        qtypes.dump(),
        '%ssecond_task.error.%s.question_types.csv' % (RESULTS_FOLDER, setting_name)
    )

    return summary_row[0]
Example #43
0
 def init(self):
     Index.init(self, "info", False)
     return
class QueryManagerITC:

    def __init__(self, db, cdb):
        # self.cacheIndex = Index()
        # self.cacheIndex.createFromCursor(cdb.index())
        # print self.cacheIndex.find('берзански')
        self.cacheIndex = Index()
        self.cacheIndex.createFromCursor(cdb.findAll())
        self.db = db
        self.numDocs = 127000

    def toArray(self, query):
        return query.split(' ')

    def getArray(self, word):

        arr = self.cacheIndex.find(word)
        if not arr:
            arr = self.db.find(word)
        if not arr:
            arr = []
        return arr

    def intersect(self, lstA, lstB):
        result = []

        if not lstA and not lstB:
            return []

        if not lstA:
            return lstB
        if not lstB:
            return lstA

        i = 0
        j = 0
        lstA.append(None)
        lstB.append(None)
        while lstA[i] is not None and lstB[j] is not None:
            if lstA[i] == lstB[j]:
                result.append(lstA[i])
                j += 1
                i += 1
            elif lstA[i] < lstB[j]:
                i += 1
            elif lstA[i] > lstB[j]:
                j += 1
            elif lstB is None:
                result.append(lstA[i])
                i += 1
            elif lstA is None:
                result.append(lstB[j])
                j += 1

        return result

    def execute(self, query):
        qArr = self.toArray(query)
        colRes = {}
        result = []
        wt = {}
        val = 0

        for word in qArr:
            colRes[word] = self.getArray(word)

        qw = WordsList(1)
        qw.insertList(qArr)

        for key in qw.list():
            tf = round(1 + math.log10(qw.getWord(key)), 2)
            idf = round(math.log10(self.numDocs/len(colRes[key])), 2)
            wt[key] = tf*idf
            val += tf*idf*tf*idf

        val = math.sqrt(val)

        for key in qw.list():
            wt[key] = wt[key]/val

        arrIntersect = []
        for i in range(len(qArr)):
            tmpArr = []
            for j in range(len(colRes[qArr[i]])):
                tmpArr.append(colRes[qArr[i]][j][0])

            arrIntersect = self.intersect(tmpArr, arrIntersect)

        arrIntersect.append(None)

        dictIntersect = {}
        for word in qArr:
            niza = colRes[word]
            dictIntersect[word] = []
            i = 0
            j = 0
            while arrIntersect[j] is not None:
                if niza[i][0] == arrIntersect[j]:
                    dictIntersect[word].append(niza[i])
                    i += 1
                    j += 1
                elif niza[i][0] > arrIntersect[j]:
                    j += 1
                elif niza[i][0] < arrIntersect[j]:
                    i += 1

        for i in range(len(arrIntersect) - 1):
            value = 0
            for key in dictIntersect.keys():
                value += dictIntersect[key][i][1] * wt[key]
            result.append((arrIntersect[i], round(value, 2)))



        return sorted(result, key=itemgetter(1), reverse=True)
Example #45
0
	query = [x.rstrip() for x in open(args["query"][0])]
else:
	query = args['query']
large = args['large']
feature = args["feature"]
typepost = args["typepost"]
lstparam = args["postparam"]

req = Requete()
restotal = []
nbrestotal = 0
try:
	for f in index:
		if verb:
			sys.stderr.write(f+'\n')
		idx = Index(f,"",verb)
		idx.lectureBase()
		if verb:
			sys.stderr.write('fin initialisation base\n')
		for q in query:
			req.putIndex(idx)
			req.putRequete(q)
			if verb:
				sys.stderr.write("Requête : "+q+"\n")
			res = req.calculRequete()
			nbrestotal += len(res)
			if verb:
				sys.stderr.write('resultat : '+str(len(res))+'\n')
			restotal.append([idx,res,q])
		if large:
			idx.close()
Example #46
0
class Program:
    def __init__(self):
        self.index = Index()
        self.folder = ".btrfs"
        self.matchAllocation = re.compile("^inode (\d+) file offset (\d+) len (\d+) disk start (\d+) offset (\d+) gen (\d+) flags (\w+) (.*)$")
        self.subvolume = Subvolume()

    def start(self):
        self.index.create()
        self.index.scan()

    def logic(self, list):
        print list
        sum = 0
        lookup = {}
        for item in list:
            hex = hashlib.sha1(item).hexdigest()
            file = self.folder + "/" + hex + ".lz4"

            print "processing: " + file
            with open(file, "r") as fd:
                compressed = fd.read()
                decompressed = lz4.decompress(compressed)
                lines = decompressed.split('\n')

                for line in lines:
                    file = Entry(line)

                    if file.sha1 in lookup:
                        continue

                    lookup[file.sha1] = True
                    sum += file.size

        print "Accumulated size is: ", self.printSize(sum)

    def remove(self, list):
        omega, crap = self.buildLookup(self.subvolume.listSubvolumes())
        exclude, files = self.buildLookup(list)
        result = self.blabla(omega, exclude, files)

        print "Accumulated freed size is: ", self.printSize(result)
        #print "Files to remove: ", result

    def blabla(self, omega, exclude, files):
        sum = 0

        for key in exclude:
            if omega[key] == exclude[key]:
                sum += files[key].size
                pass

        return sum

    def buildLookup(self, subvolumes):
        lookup = {}
        files = {}

        for subvolume in subvolumes:
            print "* " + subvolume
            hex = hashlib.sha1(subvolume).hexdigest()
            file = self.folder + "/" + hex + ".lz4"

            with open(file, "r") as fd:
                compressed = fd.read()
                decompressed = lz4.decompress(compressed)
                lines = decompressed.split('\n')

                for line in lines:
                    entry = Entry(line)
                    files[entry.sha1] = entry

                    if entry.sha1 in lookup:
                        lookup[entry.sha1] += 1
                    else:
                        lookup[entry.sha1] = 1

        return lookup, files

    def printSize(self, size):
        kb = 1024
        mb = kb*kb
        gb = kb*mb
        resolution = 3

        if (size >= gb):
            return str(round(size / float(gb), resolution)) + " gb"
        elif (size >= mb):
            return str(round(size / float(mb), resolution)) + " mb"
        elif (size >= kb):
            return str(round(size / float(kb), resolution)) + " kb"

        return str(size) + " b"

    def indexAll(self, param):
        wordlist = Wordlist()
        lookup = Lookup()

        for key in self.subvolume.listSubvolumes():
            print key
            for file in self.subvolume.read(key):
                e = Entry(file)
                for str in re.split("[^a-z0-9]*", e.path.lower()):
                    ix = wordlist.add(str)
                    lookup.add(ix, e.path)


            lookup.search(wordlist.add("review"))
Example #47
0
 def __init__(self):
     self.index = Index()
     self.folder = ".btrfs"
     self.matchAllocation = re.compile("^inode (\d+) file offset (\d+) len (\d+) disk start (\d+) offset (\d+) gen (\d+) flags (\w+) (.*)$")
     self.subvolume = Subvolume()
class QueryManager:

    def __init__(self, db, cdb):
        self.cacheIndex = Index()
        self.cacheIndex.createFromCursor(cdb.findAll())
        self.db = db

    def toArray(self, query):
        strArr = []
        tmpStr = ''
        j = 0
        level = 0
        for i in range(len(query)):
            if query[i] == '(' and i < len(query):
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
                strArr.append('(')
            elif query[i] == ')' and i < len(query):
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
                strArr.append(')')
            elif query[i] == ' ':
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
            else:
                tmpStr += query[i]
        if tmpStr != '':
            strArr.append(tmpStr)
        return strArr


    def andQ(self, A, B):
        # A ke e zbor ili lista
        # Ako ne e lista ke zejme lista od kes ili baza
        lstA = A
        lstB = B

        if not A or not B:
            raise Exception('Грешка.' )
            return None

        if not isinstance(A, list):
            lstA = self.cacheIndex.find(A)
            if not lstA:
                lstA = self.db.find(A)
            if not lstA:
                raise Exception('Зборот "' + A + '" не постои.' )
                return None

        if not isinstance(B, list):
            lstB = self.cacheIndex.find(B)
            if not lstB:
                lstB = self.db.find(B)
            if not lstB:
                raise Exception('Зборот "' + B + '" не постои.' )
                return None

        result = []
        i = 0
        j = 0
        lstA.append(None)
        lstB.append(None)

        while lstA[i] is not None and lstB[j] is not None:
            if lstA[i] == lstB[j]:
                result.append(lstA[i])
                j += 1
                i += 1
            elif lstA[i] < lstB[j]:
                i += 1
            elif lstA[i] > lstB[j]:
                j += 1

        return result

    def orQ(self, A, B):
        lstA = A
        lstB = B
        if not isinstance(A, list):
            lstA = self.cacheIndex.find(A)
            if not lstA:
                lstA = self.db.find(A)
            if not lstA:
                raise Exception('Зборот "' + A + '" не постои.')
                return None

        if not isinstance(B, list):
            lstB = self.cacheIndex.find(B)
            if not lstB:
                lstB = self.db.find(B)
            if not lstB:
                raise Exception('Зборот "' + B + '" не постои.')
                return None

        return list(set(lstA + lstB))

    def buildTree(self, query):
        tree = Tree.buildOrTree(query)
        return tree

    def execute(self, query):
        qArr = self.toArray(query)
        if len(qArr) == 1:
            lst = self.cacheIndex.find(qArr[0])
            if not lst:
                lst = self.db.find(qArr[0])
            return lst
        qTree = self.buildTree(qArr)
        result = self.executeQuery(qTree)
        return result

    def executeQuery(self, tree):
        lnode = tree.left
        rnode = tree.right

        if lnode and rnode:
            if tree.value == 'OR':
                try:
                    a = self.executeQuery(lnode)
                    b = self.executeQuery(rnode)
                    return self.orQ(a, b)
                except Exception, e:
                    print e.message
                    return None
            elif tree.value == 'AND':
                try:
                    a = self.executeQuery(lnode)
                    b = self.executeQuery(rnode)
                    return self.andQ(a, b)
                except Exception, e:
                    print e.message
                    return None
Example #49
0
else:
	ficlog = open(log,"w")
trans = None
if rules:
	trans = Transduction()
	for elt in rules:
		if os.path.isfile(elt):
			for r in open(elt):
				if r[0] != "#":
					r = r.rstrip()
					if verb:
						ficlog.write('ajout de la règle : '+r+'\n')
					trans.addRules(r)
for file in input:
	if verb:
		ficlog.write(file+'\n')
	idx = Index(file,database,verb,ficlog)
	idx.initDB()
	idx.initFicDocument()
	idx.createBase(listfeature)
	if not xml:
		idx.initTokenizer('txt',dicts,'dico',dictc)
	else:
		idx.initTokenizer('xml','')
	idx.indexTexte(trans)
	idx.sauveBase()
	idx.renameFicDocument()
	idx.closeBase()
	idx.createMeta()

Example #50
0
    def run(self, clean_first):

        idx = Index(self.solr_service, self.site, "site_code:%s" % self.site)

        # if clean_first is True then wipe the index
        if clean_first:
            idx.clean()
            idx.commit()

        total = 0
        for (dirpath, dirnames, filenames) in os.walk(self.input_folder):
            for f in filenames:
                file_handle = os.path.join(dirpath, f)

                doc = etree.parse(file_handle)
                idx.submit(etree.tostring(doc), file_handle)

            total += len(filenames)

        idx.commit()
        idx.optimize()

        # compare the local file count to what's in the index
        #  log an error if not the same
        url ="%s/select?q=site_code:%s&rows=0&wt=json" % (self.solr_service, self.site)
        data = json.loads(requests.get(url).text)
        if total != data['response']['numFound']:
            log.error("Number of files in index doesn't match local count. index: %s local: %s" % (data['response']['numFound'], total))
 def __init__(self, db, cdb):
     self.cacheIndex = Index()
     self.cacheIndex.createFromCursor(cdb.findAll())
     self.db = db
Example #52
0
from RequeteIndex import RequeteIndex as Requete
from Post import Post
from Index import Index
from Dico import Dico


if dic != "":
	dictionnaire = Dico()
	dictionnaire.load([dic])
else:
	dictionnaire = None
tidx = []
for index in lstindex:
	if verb:
		print(index)
	idx = Index(index,"",verb)
	idx.lectureBase()
	tidx.append(idx)

recom = re.compile('([A-Z]+[a-z]*) (.*)')
reconf = re.compile('([a-z]+) ([0-9]+)')

conf = {"range":3,"taille":100}
# Classe Serveur
class Server(socketserver.BaseRequestHandler):
	
	def handle_timeout(self):
		print("timeout")
	
	def handle(self):
		global tidx
Example #53
0
 def init(self):
     Index.init(self, "debug", False)
     return
Example #54
0
import pyjd # this is dummy in pyjs.
import pygwt

from Index import Index

if __name__ == '__main__':
	pyjd.setup("public/Main.html")
	app = Index()
	app.onModuleLoad()
	pyjd.run()


Example #55
0
 def init(self):
     Index.init(self, "info", False)
     if journal.hasProxy:
         self._stateFactory = self._proxyState
     return
Example #56
0
 def init(self):
     Index.init(self, "error", True)
     return
Example #57
0
 def init(self):
     Index.init(self, "warning", True)
     if journal.hasProxy:
         self._stateFactory = self._proxyState
     return
Example #58
0
class Registrar(threading.Thread):
	def __init__(self, host, port, server_key, authorized_keys, host_keys=None, incoming=None):
		threading.Thread.__init__(self)
		self.server_socket = self.create_socket(host, port)
		self.incoming = (incoming if incoming else queue.Queue())
		self.private_key = paramiko.RSAKey(filename=server_key)
		self.authorized_keys = authorized_keys
		self.host_keys = host_keys
		self.selector = selectors.DefaultSelector()
		self.num_conn = 0
		self.stop_event = threading.Event()
		self.index = Index()
		# Filesystem observer
		self.observer = Observer()
		# Server functionality
		self.channels = {}
		self.paramiko_server = SSHServer(self.authorized_keys)
		# Client functionality
		self.clients = []
		self.to_watch = queue.Queue() # ???
		# Responder threads
		handler = FileSystemEventHandler(self.index, self.incoming)
		self.responder = Responder(index=self.index, channels=self.channels, incoming=self.incoming, observer=self.observer, handler=handler)

	def connect(self, host, port):
		client = paramiko.SSHClient()
		client.load_system_host_keys(self.host_keys)
		client.set_missing_host_key_policy(paramiko.WarningPolicy())
		client.connect(host, port=port, pkey=self.private_key)
		# input, output, err = client.exec_command("ls -la")
		# for line in output.readlines():
		#     print(line)
		transport = client.get_transport()
		channel = transport.open_channel(kind="session")
		self.clients.append(client)
		self.to_watch.put(channel)
		return channel

	def get_incoming(self):
		return self.incoming

	def run(self):
		self.server_socket.listen(10)
		self.selector.register(self.server_socket, selectors.EVENT_READ)
		self.responder.start()
		self.observer.start()
		print("[Server] All set, listening for SSH connections.")

		while not self.stop_event.is_set():
			events = self.selector.select(timeout=1)
			#print('Events: '+str(events))

			# 1 - Handle incoming client registrations, messages and disconnects
			for key,event in events:
				channel = key.fileobj
				# 1.1 - New registration
				if channel is self.server_socket:
					client_socket, address = self.server_socket.accept()
					client_channel = self.negotiate_channel(client_socket)
					if not client_channel:
						continue
					# Successful negotiation
					print("[Server] Now have secure channel with " + str(address))
					self.register_channel(client_channel)
				else:
					channel_id = key.data["channel_id"]
					try:
						databin = channel.recv(1024 ^ 2)
						# 1.2 - Client disconnection
						if not databin:
							print("[Server] Disconnection")
							self.remove_channel(channel, channel_id)
						# 1.3 - Client message
						else:
							data = pickle.loads(databin)
							self.incoming.put((
								"remote",
								channel_id,
								data))
							print("[Server] Received: " + str(data))
					except socket.error:
						self.remove_channel(channel, channel_id)

			# 2 - Register connected server channels for observation
			while True:
				try:
					server_channel = self.to_watch.get(block=False)
					self.register_channel(server_channel)
				except queue.Empty:
					break
		print("[Registrar] Stopping")

	def register_channel(self, channel):
		self.num_conn += 1
		self.index.add_channel(self.num_conn)
		self.channels[self.num_conn] = channel
		self.selector.register(channel, selectors.EVENT_READ, data={"channel_id": self.num_conn})
		print("Added channel #"+str(self.num_conn))

	def remove_channel(self, channel, channel_id):
		self.selector.unregister(channel)
		self.index.remove_channel(channel_id)

	def negotiate_channel(self, client_socket):
		handler = paramiko.Transport(client_socket)
		handler.add_server_key(self.private_key)
		handler.start_server(server=self.paramiko_server)
		return handler.accept(20)

	def create_socket(self, address, port):
		server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
		#server_socket.listen(100)
		server_socket.bind((address, port))
		return server_socket

	def stop(self):
		self.stop_event.set()
		self.responder.stop()
Example #59
0
 def init(self):
     Index.init(self, "firewall", defaultState=True, fatal=True)
     if journal.hasProxy:
         self._stateFactory = self._proxyState
     return
Example #60
0
tabul = '\t'
nl = '\n'
if nosep:
	tabul = ''
	nl = ''

# path to library
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../Corpindex")

from Index import Index

if verb:
	sys.stderr.write(index)

for elt in index:
	idx = Index(elt,"")
	idx.lectureBase()
	op = ""
	div = ""
	if output == "xml":
		print("<text>",end=nl)
		if ident != "":
			print('<div id="'+ident+'">',end=nl)
	elif output == "json":
		print("[")
	for tok in idx.getIndexTokens():
		if output == "json":
			print(tok.getJson(),",",end=nl)
		elif output == "txt":
			op += tok.getFeat("f")+" "
			if tok.getFeat("f") in [".",";","?","!"]: