Example #1
0
def load_mode(args):
    core = get_plugin(args.device, args.l, args.config)
    log.info('IR for {} : {}'.format(args.device, args.model))
    log.info('Loading blob from {}'.format(args.load))
    net = get_net(model=args.model, core=core)
    net_layers, net_inputs, net_outputs = get_model_info(net)
    out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
    print_input_layers(net_inputs)
    print_output_layers(out_layers)
    layers_map = manage_user_outputs_with_mapping(mapping=args.mapping, reference_mapping=args.reference_mapping,
                                                  user_layers=out_layers)
    inputs = input_processing(args.model, net_inputs, args.input, layers_map)
    global_accuracy = []
    loaded = load_dump(args.load)
    for out_layer in layers_map:
        ref_out_layer = layers_map[out_layer]
        if out_layer == ref_out_layer:
            log.info('Layer {} statistics'.format(out_layer))
        else:
            log.info('Statistics \'{}\' vs \'{}\''.format(out_layer, ref_out_layer))
        net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
        results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
        if out_layer not in results:
            continue
        out_blob, pc = results[out_layer]
        if ref_out_layer not in loaded:
            continue
        ref_out_blob = loaded[ref_out_layer]['blob']
        a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
        if 'pc' in loaded[ref_out_layer]:
            ref_pc = loaded[ref_out_layer]['pc']
            performance_metrics(pc=pc, ref_pc=ref_pc)
        blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
        global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
    print_all_over_the_net_metrics(global_accuracy=global_accuracy)
Example #2
0
def eff_dump_csv_upload(request):
    """ Allows to import the logs contained in an Eff formatted file.
    The file must be structured as one generated by L{EffCsvWriter<sitio.eff.utils.EffCsvWriter>}.
    If there are any external id's for the logs contained in the file being uploaded not found
    on the database, then a form that allows to associate these external id's with
    existing user profiles will be presented to the user
    (see L{eff_admin_users_association<eff_admin_users_association>}).

    @param request: the request object
    @type request: django.core.handlers.wsgi.WSGIRequest

    @return: The django.http.HttpResponse object for the view

    """

    context = {'title' : 'CSV Dump Upload'}

    if request.method == 'POST':
        form = DumpUploadForm(request.POST, request.FILES)
        if form.is_valid():
            n_rows, n_users, n_projects, n_project_assocs, temp_file = load_dump(request.FILES['csv_file'].file)
            if temp_file:
                request.session['log_entries_file'] = temp_file
                request.session['n_users'] = n_users
                return HttpResponseRedirect('/efi/administration/users_association/')
            context['notices'] = ['File Uploaded Sucessfully!']
        else:
            context['errors'] = ['Invalid Form']
    else:
        form = DumpUploadForm()

    context['form'] = form

    return  render_to_response('admin_dump_csv_upload.html', context, context_instance=RequestContext(request))
def main():
    parser = ArgumentParser()
    parser.add_argument('--config', default='./configs/lgb_template.yaml')
    parser.add_argument('--create-features', action='store_true')
    options = parser.parse_args()
    config = yaml.safe_load(open(options.config))

    kfold = get_kfold(config)

    if options.create_features:
        train_path = get_dataset_filename(config, 'train')
        test_path = get_dataset_filename(config, 'test')

        with timer('Load train/test dump files'):
            train_df = load_dump(train_path)
            test_df = load_dump(test_path)

        create_features(config, train_df, test_df, kfold)

        del train_df, test_df
        gc.collect()

    target_col = config['target']
    target_path = get_dataset_filename(config, 'target')
    use_features = extract_use_features(config)
    x_train = load_train_dataset(use_features)
    y_train = load_dump(target_path)[target_col]

    output_dir = config['dataset']['output_directory']
    basename = Path(options.config).stem
    logger = setup_logger(output_dir, basename)

    clfs, importances = train_model(x_train, y_train, kfold, config, logger)

    save_feature_importances(importances, basename, output_dir)

    del x_train, y_train, importances
    gc.collect()

    pred = predict_model(clfs, use_features, config)

    print('Creating a submission csv file...')
    submission_path = get_dataset_filename(config, 'sample_submit')
    submission = pd.read_csv(submission_path)
    submission[target_col] = pred
    submission.to_csv(f'{output_dir}/submit_{basename}.csv.gz', index=False)
    print('Done.')
Example #4
0
def randomsample(args):
  """
  Produce a dataset by sampling.
  """

  if args.language:
    langs = args.language
  elif args.langs:
    langs = map(str.strip, open(args.langs))

  tempfile.tempdir = args.temp

  now = time.time()
  handle, path = tempfile.mkstemp(suffix='.tar', prefix='sample-{0}-'.format(args.number))
  os.close(handle)
  #path = os.path.join(args.temp, 'sample-{0}.tar'.format(args.number))
  build_index = not(args.skip_index)

  with tarfile.open(path, 'w') as tar:
    for lang in langs:
      try:
        dump = load_dump(lang, build_index=build_index, unpack=True)
      except KeyError:
        logger.error("do not have a dump for %s, skipping", lang)
        continue

      chosen = set() #keeps track of ids that have been chosen
      used = set() #keeps track of ids that have been examined
      
      # Adapted from wikicontent
      while len(chosen) < args.number:
        logger.debug("chose {0}/{1} so far".format(len(chosen), args.number))
        try:
          id = random.choice(xrange(dump.metadata['size']))
          if id in used:
            if len(used) >= dump.metadata['size']:
              # We have run out of documents to consider. Bail out.
              logger.warning("ran out of documents for %s", lang)
              break
            raise ReTry("already considered {0}".format(id))

          used.add(id)
          logger.debug("processing {0}".format(id))

          page = dump.get_page_by_index(id)

          if args.clean:
            # apply mediawiki removal
            text = remove_mediawiki_syntax(page.text)
            para = paragraphs(text)

            content = []
            for p in para:
              p = re.sub('\s',' ', p)
              if regexps.langref.match(p): continue
              if regexps.assoc.match(p): continue
              if regexps.syntax.search(p): continue
              p = regexps.tripquote.sub('\g<name>',p)
              p = regexps.doubquote.sub('\g<name>',p)
              u = p.decode('utf8').strip()
              if regexps.category_name.search(u): continue
              content.append(u)
            if not content: raise ReTry, "No usable content"

            document = '\n\n'.join(u.encode('utf8') for u in content) + '\n'
          else:
            # output raw mediawiki
            document = page.text

          if len(document) < args.minlen: 
            raise ReTry, "Too short ({0}<{1})".format(len(document), args.minlen)
          
          logger.debug("adding {0}".format(id))
          info = tarfile.TarInfo('{0}/{1}'.format(lang, id))
          info.size = len(document)
          info.mtime = now

          chosen.add(id)
          tar.addfile(info, StringIO(document))
          logger.debug("added {0}".format(id))

        except ReTry, e:
          logger.debug("Reject: %s", e.args[0])
          continue
      logger.info("chose %d documents for %s", len(chosen), lang)
Example #5
0
def randomsample(args):
    """
  Produce a dataset by sampling.
  """

    if args.language:
        langs = args.language
    elif args.langs:
        langs = map(str.strip, open(args.langs))

    tempfile.tempdir = args.temp

    now = time.time()
    path = os.path.join(config.get("paths", "scratch"), "sample-{0}.tar".format(args.number))
    build_index = not (args.skip_index)

    with tarfile.open(path, "w") as tar:
        for lang in langs:
            try:
                dump = load_dump(lang, build_index=build_index, unpack=True)
            except KeyError:
                logger.error("do not have a dump for %s, skipping", lang)
                continue

            chosen = set()  # keeps track of ids that have been chosen
            used = set()  # keeps track of ids that have been examined

            # Adapted from wikicontent
            while len(chosen) < args.number:
                logger.debug("chose {0}/{1} so far".format(len(chosen), args.number))
                try:
                    id = random.choice(xrange(dump.metadata["size"]))
                    if id in used:
                        if len(used) >= dump.metadata["size"]:
                            # We have run out of documents to consider. Bail out.
                            logger.warning("ran out of documents for %s", lang)
                            break
                        raise ReTry("already considered {0}".format(id))

                    used.add(id)
                    logger.debug("processing {0}".format(id))

                    page = dump.get_page_by_index(id)

                    if args.clean:
                        # apply mediawiki removal
                        text = remove_mediawiki_syntax(page.text)
                        para = paragraphs(text)

                        content = []
                        for p in para:
                            p = re.sub("\s", " ", p)
                            if regexps.langref.match(p):
                                continue
                            if regexps.assoc.match(p):
                                continue
                            if regexps.syntax.search(p):
                                continue
                            p = regexps.tripquote.sub("\g<name>", p)
                            p = regexps.doubquote.sub("\g<name>", p)
                            u = p.decode("utf8").strip()
                            if regexps.category_name.search(u):
                                continue
                            content.append(u)
                        if not content:
                            raise ReTry, "No usable content"

                        document = "\n\n".join(u.encode("utf8") for u in content) + "\n"
                    else:
                        # output raw mediawiki
                        document = page.text

                    if len(document) < args.minlen:
                        raise ReTry, "Too short ({0}<{1})".format(len(text), args.minlen)

                    logger.debug("adding {0}".format(id))
                    info = tarfile.TarInfo("{0}/{1}".format(lang, id))
                    info.size = len(document)
                    info.mtime = now

                    chosen.add(id)
                    tar.addfile(info, StringIO(document))
                    logger.debug("added {0}".format(id))

                except ReTry, e:
                    logger.debug("Reject: %s", e.args[0])
                    continue
            logger.info("chose %d documents for %s", len(chosen), lang)