Esempio n. 1
0
 def create_consumer_acl(self, username, topic):
     user = '******'.format(username=username)
     cmd = '--authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal {user} --consumer --group=* --topic {topic}'.format(
         user=user, topic=topic)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_consumer_acl] {stdout}'.format(stdout=stdout))
     return stdout
Esempio n. 2
0
 def create_user(self, username, password):
     pwd = "'SCRAM-SHA-256=[password={password}],SCRAM-SHA-512=[password={password}]'".format(
         password=password)
     cmd = "--zookeeper localhost:2181 --alter --add-config {pwd} --entity-type users --entity-name {username}".format(
         pwd=pwd, username=username)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_user] {stdout}'.format(stdout=stdout))
     return stdout
Esempio n. 3
0
 def create_topic(self, topic_name, replication_factor, partitions):
     cmd = '--create --zookeeper localhost:2181 --replication-factor {replication_factor} --partitions {partitions} --topic {topic_name}'.format(
         topic_name=topic_name,
         replication_factor=replication_factor,
         partitions=partitions)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_topic] {stdout}'.format(stdout=stdout))
     return stdout
Esempio n. 4
0
    def get_verse_alignment_mp(self, verse_nums, edition_pairs):
        res = []
        ps_lang, pt_lang, index_t, alignments = None, None, None, None  # if we have multiple edition pairs of the same languages, we use prev loaded files!
        for edition_1, edition_2 in edition_pairs:
            aligns = {}

            if self.get_lang_from_edition(
                    edition_1) == self.get_lang_from_edition(edition_2):
                res.append((edition_1, edition_2, aligns))
                continue

            if edition_1 in self.bert_files and edition_2 in self.bert_files:
                LOG.info("going to super aglingment for: {}, {}".format(
                    edition_1, edition_2))
                res.append((edition_1, edition_2, super().get_verse_alignment(
                    verse_nums, self.lang_prf_map[edition_1],
                    self.lang_prf_map[edition_2])))
                continue

            LOG.info("getting eflomal aglingment for: {} , {}".format(
                edition_1, edition_2))
            s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
                edition_1, edition_2)
            s_lang_file = self.edition_file_mapping[s_edition]
            t_lang_file = self.edition_file_mapping[t_edition]

            revert = False
            if s_edition == edition_2:
                revert = True

            if s_lang != ps_lang or t_lang != pt_lang:
                alignments = self.get_alignment(s_lang, t_lang)
                index_t = self.get_index(s_lang, t_lang)
                ps_lang, pt_lang = s_lang, t_lang

            index = None
            if s_lang_file in index_t:
                if t_lang_file in index_t[s_lang_file]:
                    index = index_t[s_lang_file][t_lang_file]

            if index is not None:

                LOG.info(
                    "getting verse, {}, {}, {}, {}, {}, {}, {}, {}".format(
                        edition_1, edition_2, s_lang, t_lang, ps_lang, pt_lang,
                        len(index_t), len(index)))
                for verse in verse_nums:
                    if verse in index:
                        aligns[verse] = self.create_ordered_alignment(
                            alignments, index[verse], revert)
                LOG.info("verses got")

            else:
                LOG.warning("couldn't find index for: " + s_edition + ", " +
                            t_edition)

            res.append((edition_1, edition_2, aligns))
        return res
Esempio n. 5
0
 def read_langs_order_file(self):
     res = []
     try:
         with open(self.lang_order_file_path, 'r') as inf:
             for l in inf:
                 res.append(l.strip())
     except FileNotFoundError as e:
         LOG.warning("Langs order file not found")
     return res
Esempio n. 6
0
 def post(self):
     args = parser.parse_args()
     LOG.info(
         '[request] username={username}'.format(username=args['username']))
     LOG.info(
         '[request] password={password}'.format(password=args['password']))
     response = kafka_configs.create_user(args['username'],
                                          args['password'])
     return response
Esempio n. 7
0
 def create_index_binary_file_if_not_exists(self, lang1, lang2):
     index_lock.acquire()
     if not os.path.exists(self.get_index_binary_file_path(lang1, lang2)):
         LOG.info("creating binary index file for {}, {}".format(
             lang1, lang2))
         ind = self.read_index_file(self.get_index_file_path(lang1, lang2))
         with (open(self.get_index_binary_file_path(lang1, lang2),
                    'wb')) as of:
             pickle.dump(ind, of)
     index_lock.release()
Esempio n. 8
0
 def create_align_binary_file_if_not_exists(self, lang1, lang2):
     alignments_lock.acquire()
     if not os.path.exists(self.get_align_binary_file_path(lang1, lang2)):
         LOG.info("creating binary alignments file for {}, {}".format(
             lang1, lang2))
         aln = self.read_alignment_file(
             self.get_align_file_path(lang1, lang2))
         with (open(self.get_align_binary_file_path(lang1, lang2),
                    'wb')) as of:
             pickle.dump(aln, of)
     alignments_lock.release()
Esempio n. 9
0
 def read_alignment_file(self, file_path):
     LOG.info("reading alignment file ({})".format(file_path))
     res = []
     with open(file_path, 'r') as f:
         for line in f:
             s_l = line.split(
                 '\t')  # handle index at the begining of the line
             if len(s_l) > 1:
                 res.append(s_l[1])
             else:
                 res.append(s_l[0])
     return res
Esempio n. 10
0
    def read_index_file(self, file_path):
        LOG.info("reading index file ({})".format(file_path))
        res = {}
        with open(file_path, 'r') as f:
            for i, line in enumerate(f):
                verse, s_file, t_file = tuple(line.strip().split('\t'))
                self.setup_dict_entry(res, s_file, {})

                self.setup_dict_entry(res[s_file], t_file, {})
                res[s_file][t_file][verse] = i

        return res
Esempio n. 11
0
 def add_to_index(self, index, key, val, to_send):
     loc = self.get_hash(key) % self.index_size
     next_count = 0
     if index[loc] == None:
         index[loc] = {"key": key, "val": val, "next": None}
     else:
         last = index[loc]
         if to_send != None:
             LOG.info("verse {}, hash {}, loc {}, index {}".format(
                 key, self.get_hash(key), loc, to_send))
         while last["next"] != None:
             next_count += 1
             last = last["next"]
         last["next"] = {"key": key, "val": val, "next": None}
     return next_count
Esempio n. 12
0
    def read_dict_file(self, file_path, do_lower=False):
        res = {}
        try:
            with open(file_path, "r") as mapping_list:
                for l in mapping_list:
                    if l.startswith('#'):
                        continue

                    if do_lower:
                        l.lower()

                    pair = l.strip().split('\t')

                    res[pair[0].strip()] = pair[1].strip()
        except FileNotFoundError:
            LOG.warning(f"file {file_path} not found")
        return res
Esempio n. 13
0
 def _exec_cmd(self, cmd):
     cmd = '/root/kafka/bin/kafka-topics.sh {cmd}'.format(cmd=cmd)
     LOG.info('[Command] {cmd}'.format(cmd=cmd))
     p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
     stdout, stderr = p.communicate()
     if p.returncode != 0:
         LOG.error('[kafka-topics] failed')
         LOG.error('[kafka-topics] {stdout}'.format(stdout=stdout))
         LOG.error('[kafka-topics] {stderr}'.format(stderr=stderr))
     return (p.returncode, stdout)
Esempio n. 14
0
    def get_verse_alignment(self,
                            verse_nums,
                            edition_1,
                            edition_2,
                            alignments_loc=None,
                            index_loc=None):
        aligns = {}

        if edition_1 == edition_2:
            return aligns

        if edition_1 in self.bert_files and edition_2 in self.bert_files:
            LOG.info("going to super aglingment for: {}, {}".format(
                edition_1, edition_2))
            return super().get_verse_alignment(verse_nums,
                                               self.lang_prf_map[edition_1],
                                               self.lang_prf_map[edition_2])

        LOG.info("getting eflomal aglingment for: {} , {}".format(
            edition_1, edition_2))
        s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
            edition_1, edition_2)
        s_lang_file = self.edition_file_mapping[s_edition]
        t_lang_file = self.edition_file_mapping[t_edition]
        revert = False
        if s_edition == edition_2:
            revert = True

        LOG.info("copying")
        alignments = self.content_cache.get(
            self.get_align_file_path(s_lang, t_lang))
        index = self.indexes_cache.get(self.get_index_file_path(
            s_lang, t_lang))

        if s_lang_file in index:
            if t_lang_file in index[s_lang_file]:
                index = index[s_lang_file][t_lang_file]

        LOG.info("getting verses")
        for verse in verse_nums:
            if verse in index:
                aligns[verse] = self.create_ordered_alignment(
                    alignments, index[verse], revert)
        LOG.info("verses got")
        return aligns
Esempio n. 15
0
 def post(self):
     args = parser.parse_args()
     LOG.info('[request] topic={topic}'.format(topic=args['topic']))
     LOG.info('[request] replication={replication}'.format(
         replication=args['replication']))
     LOG.info('[request] partitions={partitions}'.format(
         partitions=args['partitions']))
     response = kafka_topics.create_topic(args['topic'],
                                          args['replication'],
                                          args['partitions'])
     return response
Esempio n. 16
0
    def post(self):
        args = parser.parse_args()
        LOG.info(
            '[request] username={username}'.format(username=args['username']))
        LOG.info('[request] topic={topic}'.format(topic=args['topic']))
        LOG.info('[request] role={role}'.format(role=args['role']))

        if args['role'] == 'producer':
            response = kafka_acls.create_producer_acl(args['username'],
                                                      args['topic'])
        elif args['role'] == 'consumer':
            response = kafka_acls.create_consumer_acl(args['username'],
                                                      args['topic'])
        else:
            response = 'invalid role'

        return response
Esempio n. 17
0
    def search_documents(self,
                         q,
                         verse=None,
                         all_docs=False,
                         doc_count=10,
                         prefixed_search=True,
                         language=None):
        """
        since elasticsearch doesn't support more that 10000 hits per run we currently 
        stick to at most 10000 retrieved docs,
        we can later implement retrieval of all matched docs
        """
        if q.strip() != "":
            query = {
                "query": {
                    "bool": {
                        "must": {
                            "multi_match": {
                                "fields": ["content", "language"],
                                "query": q,
                                "type": "cross_fields"  #,
                                # "use_dis_max": False
                                # , "analyzer":"autocomplete"
                            }
                        }
                    }
                }
            }

            if verse != None:
                query["query"]["bool"]["filter"] = {
                    "match": {
                        "verse_id": verse
                    }
                }
            if language != None:
                query["query"]["bool"]["filter"] = {
                    "match": {
                        "language": language
                    }
                }
        else:
            query = {
                "query": {
                    "bool": {
                        "must": {
                            "match": {
                                "verse_id": verse
                            }
                        }
                    }
                }
            }

        query[
            "size"] = 10000 if all_docs == True or doc_count > 10000 else doc_count

        LOG.info(query)
        resp = requests.get(
            self.ealstic_search_autocomplete_url
            if prefixed_search else self.ealstic_search_normal_url,
            data=json.dumps(query),
            headers={'Content-Type': 'application/json'})
        return resp.json()
Esempio n. 18
0
 def delete_topic(self, topic):
     cmd = '--delete --zookeeper localhost:2181 --topic {topic}'.format(
         topic=topic)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[delete_topic] {stdout}'.format(stdout=stdout))
     return stdout
Esempio n. 19
0
 def list_topic(self):
     cmd = '--list --zookeeper localhost:2181'
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[list_topic] {stdout}'.format(stdout=stdout))
     return stdout
Esempio n. 20
0
 def get_user(self, username):
     cmd = '--zookeeper localhost:2181 --describe --entity-type users --entity-name {username}'.format(
         username=username)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[get_user] {stdout}'.format(stdout=stdout))
     return stdout
Esempio n. 21
0
 def list_user(self):
     cmd = '--zookeeper localhost:2181 --describe --entity-type users'
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[list_user] {stdout}'.format(stdout=stdout))
     return stdout