Ejemplo n.º 1
0
 def run_sequential(self):
     params = self.job['params']
     # init access to ES DB
     db = TMDbApi()
     Task.save_segments(
         db.generate((params['slang'], params['tlang']), params['plang'],
                     params['domain']))
Ejemplo n.º 2
0
 def query(self):
     #Select the 10 best results
     db = TMDbApi()
     l_best_segments = []
     count = -1
     for segment, match in db.query(self.src_input,
                                    (self.src_lang, self.tgt_lang),
                                    filter={'domain': [self.domain]}):
         count = count + 1
         if segment or count <= 100:
             l_best_segments.append((segment.to_dict(), match))
         else:
             break
     return l_best_segments
Ejemplo n.º 3
0
    def __call__(self, index, segments_iter):
        db = TMDbApi()

        batch_mget = []
        for pivot_id in segments_iter:
            batch_mget += [(pivot_id, self.plang, lang) for lang in self.langs]
            # Reached batch limit - generate segments
            if len(batch_mget) >= self.BATCH_SIZE:
                for segment in db._generate_batch(batch_mget, self.domains):
                    yield segment
                batch_mget = []
        # Generate segments for remaining incomplete batch
        for segment in db._generate_batch(batch_mget, self.domains):
            yield segment
Ejemplo n.º 4
0
    def get_rdd_generate(self):
        # init access to ES DB
        db = TMDbApi()
        params = self.job['params']
        #return self._get_rdd(db, db.generate(self.get_langs(), params['plang'], params['domain']))

        return self._get_rdd(
            db, db.ml_index.scan_pivot(params['plang'], self.get_langs()))
Ejemplo n.º 5
0
  parser.add_argument('-a', '--add', action="store_true", help="Add segments to the DB")
  parser.add_argument('-q', '--query', action="store_true", help="Query segments from the DB")
  parser.add_argument('-nq', '--num_query', type=int, help="Limit queries to this number", default=-1)
  parser.add_argument('-i', '--init', action="store_true", help="Init DB")
  parser.add_argument('-pt', '--pos_tag', action="store_true", help="Run POS tagger on segment texts")
  parser.add_argument('-sp', '--split_seg', action="store_true", help="Run Split rules on segment texts")

  parser.add_argument('-d', '--dir', type=str, help="Root directory for TMX file tree")
  parser.add_argument('-f', '--file', type=str, help="Single TMX file")
  parser.add_argument('-md', '--map_db', choices=['elasticsearch', 'mongodb', 'couchdb', 'redis', 'mysql', 'postgresql'],
                      help='Choose underlying driver for Map DB',
                      default='elasticsearch')
  return parser.parse_args()

if __name__ == "__main__":
  args = parse_args()
  if args.file:
    it = [args.file]
  else:
    it = TMXFileIterator(args.dir)

  db = TMDbApi(args.map_db)
  if args.init:
      db.init_db()
  # TODO: get from command line
  qlangs = ('en-GB', 'es-ES')
  #qlangs = ('en-GB', 'fr-FR')
  if args.add:
    add()
  if args.query:
      query(args.num_query)
Ejemplo n.º 6
0
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
from flask import render_template, Blueprint
from TMDbApi.TMDbApi import TMDbApi
from TMDbApi.TMDbQuery import TMDbQuery
from _datetime import datetime, timedelta

admin_ui = Blueprint('admin_ui',
                     __name__,
                     template_folder='templates',
                     static_folder='assets',
                     static_url_path='/admin/assets')
tmdb = TMDbApi()


@admin_ui.route('/admin/')
@admin_ui.route('/admin/index.html')
def index():
    users_headers = [
        'username', 'role', 'is_active', 'scopes', 'created', 'password'
    ]

    base = datetime.today()
    month_dates = [(base - timedelta(days=x * 30)).strftime("%m/%y")
                   for x in range(0, 11)]
    usage_headers = ['username', 'total'] + month_dates
    tags_headers = ['id', 'name', 'type']
    export_headers = ['id', 'filename', 'size', 'export_time']
Ejemplo n.º 7
0
 def __init__(self, username):
     self.db = TMDbApi()
     self.username = username
Ejemplo n.º 8
0
class TMExport:
    def __init__(self, username):
        self.db = TMDbApi()
        self.username = username

    def export(self,
               export_id,
               langs,
               filters=None,
               duplicates_only=False,
               limit=None):
        # Export path will have "." in the beginning to indicate work in progress
        export_path = self._get_export_path("." + export_id)
        os.makedirs(export_path, exist_ok=True)

        file_names = self.db.file_names(langs, filters)

        # Temporary zip file.
        tmpfile = os.path.join(export_path, "_".join(langs).upper() + '.zip')
        writer = TMXIterWriter(tmpfile, langs[0])

        def segment_iter(filters):
            i = 0

            scan_fun = self.db.scan if not duplicates_only else self.db.get_duplicates
            seg_iter = scan_fun(langs, filters)
            for s in seg_iter:
                i += 1
                if limit and i > limit: return
                yield s

        def write_iter():
            # Iterate file by file
            for fn in file_names:
                filters['file_name'] = [fn]
                for data in writer.write_iter(segment_iter(filters), fn):
                    # TODO: in addition, write data to a local file to import it
                    # at the end of generation
                    yield data
            # Zip footer
            for data in writer.write_close():
                yield data

        # Generate zipped TMX file(s)
        of = open(tmpfile, "wb")
        for d in write_iter():
            of.write(d)
        # When is done, finalize by renaming export path
        os.rename(export_path, self._get_export_path(export_id))

        return tmpfile

    def list(self, export_id='*'):
        export_pattern = os.path.join(self._get_export_path(export_id),
                                      '*.zip')
        flist = []
        for f in glob.glob(export_pattern):
            fdict = dict()
            split_path = os.path.split(f)
            fdict["filename"] = split_path[-1]
            fdict["filepath"] = split_path[-2]
            fdict["id"] = os.path.basename(split_path[-2])
            fdict["export_time"] = datetime.datetime.fromtimestamp(
                os.path.getmtime(f))
            fdict["size"] = os.path.getsize(f)
            flist.append(fdict)
        return flist

    def delete(self, export_id):
        try:
            shutil.rmtree(self._get_export_path(export_id))
        except FileNotFoundError:
            pass

    def _get_export_path(self, export_id):
        # Setup export path
        export_path = os.path.join(
            G_CONFIG.config.get('export_path', tempfile.gettempdir()),
            self.username, export_id)
        return export_path
Ejemplo n.º 9
0
 def delete_segments(task, langs, filter, duplicates_only):
     db = TMDbApi()
     count = db.count_scan(task.get_langs(), filter)
     logging.info("Delete scan size: {}".format(count))
     db.delete(langs, filter, duplicates_only)
Ejemplo n.º 10
0
 def save_segments(seg_iter):
     TMDbApi().add_segments(seg_iter)
Ejemplo n.º 11
0
 def get_rdd(self):
     # init access to ES DB
     db = TMDbApi()
     return self._get_rdd(
         db, db.scan(self.get_langs(), self.job['params']['filter']))
Ejemplo n.º 12
0
 def maintain_segments(task, langs, filter):
     db = TMDbApi()
     db.add_segments(task(0, db.scan(langs, filter)))