コード例 #1
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def create_consumer_acl(self, username, topic):
     user = '******'.format(username=username)
     cmd = '--authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal {user} --consumer --group=* --topic {topic}'.format(
         user=user, topic=topic)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_consumer_acl] {stdout}'.format(stdout=stdout))
     return stdout
コード例 #2
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def create_user(self, username, password):
     pwd = "'SCRAM-SHA-256=[password={password}],SCRAM-SHA-512=[password={password}]'".format(
         password=password)
     cmd = "--zookeeper localhost:2181 --alter --add-config {pwd} --entity-type users --entity-name {username}".format(
         pwd=pwd, username=username)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_user] {stdout}'.format(stdout=stdout))
     return stdout
コード例 #3
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def create_topic(self, topic_name, replication_factor, partitions):
     cmd = '--create --zookeeper localhost:2181 --replication-factor {replication_factor} --partitions {partitions} --topic {topic_name}'.format(
         topic_name=topic_name,
         replication_factor=replication_factor,
         partitions=partitions)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_topic] {stdout}'.format(stdout=stdout))
     return stdout
コード例 #4
0
    def get_verse_alignment_mp(self, verse_nums, edition_pairs):
        res = []
        ps_lang, pt_lang, index_t, alignments = None, None, None, None  # if we have multiple edition pairs of the same languages, we use prev loaded files!
        for edition_1, edition_2 in edition_pairs:
            aligns = {}

            if self.get_lang_from_edition(
                    edition_1) == self.get_lang_from_edition(edition_2):
                res.append((edition_1, edition_2, aligns))
                continue

            if edition_1 in self.bert_files and edition_2 in self.bert_files:
                LOG.info("going to super aglingment for: {}, {}".format(
                    edition_1, edition_2))
                res.append((edition_1, edition_2, super().get_verse_alignment(
                    verse_nums, self.lang_prf_map[edition_1],
                    self.lang_prf_map[edition_2])))
                continue

            LOG.info("getting eflomal aglingment for: {} , {}".format(
                edition_1, edition_2))
            s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
                edition_1, edition_2)
            s_lang_file = self.edition_file_mapping[s_edition]
            t_lang_file = self.edition_file_mapping[t_edition]

            revert = False
            if s_edition == edition_2:
                revert = True

            if s_lang != ps_lang or t_lang != pt_lang:
                alignments = self.get_alignment(s_lang, t_lang)
                index_t = self.get_index(s_lang, t_lang)
                ps_lang, pt_lang = s_lang, t_lang

            index = None
            if s_lang_file in index_t:
                if t_lang_file in index_t[s_lang_file]:
                    index = index_t[s_lang_file][t_lang_file]

            if index is not None:

                LOG.info(
                    "getting verse, {}, {}, {}, {}, {}, {}, {}, {}".format(
                        edition_1, edition_2, s_lang, t_lang, ps_lang, pt_lang,
                        len(index_t), len(index)))
                for verse in verse_nums:
                    if verse in index:
                        aligns[verse] = self.create_ordered_alignment(
                            alignments, index[verse], revert)
                LOG.info("verses got")

            else:
                LOG.warning("couldn't find index for: " + s_edition + ", " +
                            t_edition)

            res.append((edition_1, edition_2, aligns))
        return res
コード例 #5
0
 def read_langs_order_file(self):
     res = []
     try:
         with open(self.lang_order_file_path, 'r') as inf:
             for l in inf:
                 res.append(l.strip())
     except FileNotFoundError as e:
         LOG.warning("Langs order file not found")
     return res
コード例 #6
0
 def post(self):
     args = parser.parse_args()
     LOG.info(
         '[request] username={username}'.format(username=args['username']))
     LOG.info(
         '[request] password={password}'.format(password=args['password']))
     response = kafka_configs.create_user(args['username'],
                                          args['password'])
     return response
コード例 #7
0
 def create_index_binary_file_if_not_exists(self, lang1, lang2):
     index_lock.acquire()
     if not os.path.exists(self.get_index_binary_file_path(lang1, lang2)):
         LOG.info("creating binary index file for {}, {}".format(
             lang1, lang2))
         ind = self.read_index_file(self.get_index_file_path(lang1, lang2))
         with (open(self.get_index_binary_file_path(lang1, lang2),
                    'wb')) as of:
             pickle.dump(ind, of)
     index_lock.release()
コード例 #8
0
 def create_align_binary_file_if_not_exists(self, lang1, lang2):
     alignments_lock.acquire()
     if not os.path.exists(self.get_align_binary_file_path(lang1, lang2)):
         LOG.info("creating binary alignments file for {}, {}".format(
             lang1, lang2))
         aln = self.read_alignment_file(
             self.get_align_file_path(lang1, lang2))
         with (open(self.get_align_binary_file_path(lang1, lang2),
                    'wb')) as of:
             pickle.dump(aln, of)
     alignments_lock.release()
コード例 #9
0
 def read_alignment_file(self, file_path):
     LOG.info("reading alignment file ({})".format(file_path))
     res = []
     with open(file_path, 'r') as f:
         for line in f:
             s_l = line.split(
                 '\t')  # handle index at the begining of the line
             if len(s_l) > 1:
                 res.append(s_l[1])
             else:
                 res.append(s_l[0])
     return res
コード例 #10
0
    def read_index_file(self, file_path):
        LOG.info("reading index file ({})".format(file_path))
        res = {}
        with open(file_path, 'r') as f:
            for i, line in enumerate(f):
                verse, s_file, t_file = tuple(line.strip().split('\t'))
                self.setup_dict_entry(res, s_file, {})

                self.setup_dict_entry(res[s_file], t_file, {})
                res[s_file][t_file][verse] = i

        return res
コード例 #11
0
 def add_to_index(self, index, key, val, to_send):
     loc = self.get_hash(key) % self.index_size
     next_count = 0
     if index[loc] == None:
         index[loc] = {"key": key, "val": val, "next": None}
     else:
         last = index[loc]
         if to_send != None:
             LOG.info("verse {}, hash {}, loc {}, index {}".format(
                 key, self.get_hash(key), loc, to_send))
         while last["next"] != None:
             next_count += 1
             last = last["next"]
         last["next"] = {"key": key, "val": val, "next": None}
     return next_count
コード例 #12
0
    def read_dict_file(self, file_path, do_lower=False):
        res = {}
        try:
            with open(file_path, "r") as mapping_list:
                for l in mapping_list:
                    if l.startswith('#'):
                        continue

                    if do_lower:
                        l.lower()

                    pair = l.strip().split('\t')

                    res[pair[0].strip()] = pair[1].strip()
        except FileNotFoundError:
            LOG.warning(f"file {file_path} not found")
        return res
コード例 #13
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def _exec_cmd(self, cmd):
     cmd = '/root/kafka/bin/kafka-topics.sh {cmd}'.format(cmd=cmd)
     LOG.info('[Command] {cmd}'.format(cmd=cmd))
     p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
     stdout, stderr = p.communicate()
     if p.returncode != 0:
         LOG.error('[kafka-topics] failed')
         LOG.error('[kafka-topics] {stdout}'.format(stdout=stdout))
         LOG.error('[kafka-topics] {stderr}'.format(stderr=stderr))
     return (p.returncode, stdout)
コード例 #14
0
    def get_verse_alignment(self,
                            verse_nums,
                            edition_1,
                            edition_2,
                            alignments_loc=None,
                            index_loc=None):
        aligns = {}

        if edition_1 == edition_2:
            return aligns

        if edition_1 in self.bert_files and edition_2 in self.bert_files:
            LOG.info("going to super aglingment for: {}, {}".format(
                edition_1, edition_2))
            return super().get_verse_alignment(verse_nums,
                                               self.lang_prf_map[edition_1],
                                               self.lang_prf_map[edition_2])

        LOG.info("getting eflomal aglingment for: {} , {}".format(
            edition_1, edition_2))
        s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
            edition_1, edition_2)
        s_lang_file = self.edition_file_mapping[s_edition]
        t_lang_file = self.edition_file_mapping[t_edition]
        revert = False
        if s_edition == edition_2:
            revert = True

        LOG.info("copying")
        alignments = self.content_cache.get(
            self.get_align_file_path(s_lang, t_lang))
        index = self.indexes_cache.get(self.get_index_file_path(
            s_lang, t_lang))

        if s_lang_file in index:
            if t_lang_file in index[s_lang_file]:
                index = index[s_lang_file][t_lang_file]

        LOG.info("getting verses")
        for verse in verse_nums:
            if verse in index:
                aligns[verse] = self.create_ordered_alignment(
                    alignments, index[verse], revert)
        LOG.info("verses got")
        return aligns
コード例 #15
0
ファイル: topic.py プロジェクト: xrodneylee/kafka-agent
 def post(self):
     args = parser.parse_args()
     LOG.info('[request] topic={topic}'.format(topic=args['topic']))
     LOG.info('[request] replication={replication}'.format(
         replication=args['replication']))
     LOG.info('[request] partitions={partitions}'.format(
         partitions=args['partitions']))
     response = kafka_topics.create_topic(args['topic'],
                                          args['replication'],
                                          args['partitions'])
     return response
コード例 #16
0
    def post(self):
        args = parser.parse_args()
        LOG.info(
            '[request] username={username}'.format(username=args['username']))
        LOG.info('[request] topic={topic}'.format(topic=args['topic']))
        LOG.info('[request] role={role}'.format(role=args['role']))

        if args['role'] == 'producer':
            response = kafka_acls.create_producer_acl(args['username'],
                                                      args['topic'])
        elif args['role'] == 'consumer':
            response = kafka_acls.create_consumer_acl(args['username'],
                                                      args['topic'])
        else:
            response = 'invalid role'

        return response
コード例 #17
0
ファイル: document_retrieval.py プロジェクト: cisnlp/parcoure
    def search_documents(self,
                         q,
                         verse=None,
                         all_docs=False,
                         doc_count=10,
                         prefixed_search=True,
                         language=None):
        """
        since elasticsearch doesn't support more that 10000 hits per run we currently 
        stick to at most 10000 retrieved docs,
        we can later implement retrieval of all matched docs
        """
        if q.strip() != "":
            query = {
                "query": {
                    "bool": {
                        "must": {
                            "multi_match": {
                                "fields": ["content", "language"],
                                "query": q,
                                "type": "cross_fields"  #,
                                # "use_dis_max": False
                                # , "analyzer":"autocomplete"
                            }
                        }
                    }
                }
            }

            if verse != None:
                query["query"]["bool"]["filter"] = {
                    "match": {
                        "verse_id": verse
                    }
                }
            if language != None:
                query["query"]["bool"]["filter"] = {
                    "match": {
                        "language": language
                    }
                }
        else:
            query = {
                "query": {
                    "bool": {
                        "must": {
                            "match": {
                                "verse_id": verse
                            }
                        }
                    }
                }
            }

        query[
            "size"] = 10000 if all_docs == True or doc_count > 10000 else doc_count

        LOG.info(query)
        resp = requests.get(
            self.ealstic_search_autocomplete_url
            if prefixed_search else self.ealstic_search_normal_url,
            data=json.dumps(query),
            headers={'Content-Type': 'application/json'})
        return resp.json()
コード例 #18
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def delete_topic(self, topic):
     cmd = '--delete --zookeeper localhost:2181 --topic {topic}'.format(
         topic=topic)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[delete_topic] {stdout}'.format(stdout=stdout))
     return stdout
コード例 #19
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def list_topic(self):
     cmd = '--list --zookeeper localhost:2181'
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[list_topic] {stdout}'.format(stdout=stdout))
     return stdout
コード例 #20
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def get_user(self, username):
     cmd = '--zookeeper localhost:2181 --describe --entity-type users --entity-name {username}'.format(
         username=username)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[get_user] {stdout}'.format(stdout=stdout))
     return stdout
コード例 #21
0
ファイル: kafka.py プロジェクト: xrodneylee/kafka-agent
 def list_user(self):
     cmd = '--zookeeper localhost:2181 --describe --entity-type users'
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[list_user] {stdout}'.format(stdout=stdout))
     return stdout