Example #1
0
def main():
    if not args.raters:
        print "No raters are loaded."
        args.list_raters = True

    if args.list_raters:
        utils.print_raters_list(args.verbosity)
        sys.exit(0)

    if args.ignore_warnings:
        warnings.simplefilter('ignore', utils.RatingWarning)

    if args.redirect_warnings:
        warnings.showwarning = warn_to_stdout

    rater_instances = []
    print "raters = ", raters
    sys.exit()
    for rater_name in args.raters:
        rater_module = getattr(raters, rater_name)
        rater_instances.append(rater_module.Rater())

    cands = []
    if args.num_procs > 1:
        print "Using %d rater threads" % args.num_procs
        rater_pool = multiprocessing.Pool(processes=args.num_procs)
        apply_async = lambda pfdfn: rater_pool.apply_async(rate_pfd, \
                                                (pfdfn, rater_instances))
        inprogress = [apply_async(pfdfn) for pfdfn in args.infiles]
        failed = []
        while inprogress:
            for ii in range(len(inprogress))[::-1]:
                result = inprogress[ii]
                if result.ready():
                    if result.successful():
                        cands.append(result.get())
                    else:
                        failed.append(result)
                    inprogress.pop(ii)
        print "Number of failures detected: %d" % len(failed)
        for fail in failed:
            try:
                print type(fail), fail
                fail.get()
            except:
                traceback.print_exc()
    else:
        for pfdfn in args.infiles:
            cand = rate_pfd(pfdfn, rater_instances)
            cands.append(cand)
            cand.clear_cache()
    for cand in cands:
        if args.write_to_file:
            cand.write_ratings_to_file()
        if args.write_to_screen:
            print cand.pfdfn
            print cand.get_ratings_overview()
            print '-' * 25
Example #2
0
def main():
    if not args.raters:
        print "No raters are loaded."
        args.list_raters = True

    if args.list_raters:
        utils.print_raters_list(args.verbosity)
        sys.exit(0)

    if args.ignore_warnings:
        warnings.simplefilter('ignore', utils.RatingWarning)

    if args.redirect_warnings:
        warnings.showwarning = warn_to_stdout

    rater_instances = []
    for rater_name in args.raters:
        rater_module = getattr(raters, rater_name)
        rater_instances.append(rater_module.Rater())

    cands = []
    if args.num_procs > 1:
        print "Using %d rater threads" % args.num_procs
        rater_pool = multiprocessing.Pool(processes=args.num_procs)
        apply_async = lambda pfdfn: rater_pool.apply_async(rate_pfd, \
                                                (pfdfn, rater_instances))
        inprogress = [apply_async(pfdfn) for pfdfn in args.infiles]
        failed = []
        while inprogress:
            for ii in range(len(inprogress))[::-1]:
                result = inprogress[ii]
                if result.ready():
                    if result.successful():
                        cands.append(result.get())
                    else:
                        failed.append(result)
                    inprogress.pop(ii)
        print "Number of failures detected: %d" % len(failed)
        for fail in failed:
            try:
                print type(fail), fail
                fail.get()
            except:
                traceback.print_exc()
    else:
        for pfdfn in args.infiles:
            cand = rate_pfd(pfdfn, rater_instances)
            cands.append(cand)
            cand.clear_cache()
    for cand in cands:
        if args.write_to_file:
            cand.write_ratings_to_file()
        if args.write_to_screen:
            print cand.pfdfn
            print cand.get_ratings_overview()
            print '-'*25
Example #3
0
def main():
    if args.num_procs > 1:
        warning.warn("Multithreading not implemnted (%d threads requested)" % \
                            args.num_procs)
    
    if not args.raters:
        print "No raters are loaded."
        args.list_raters = True

    if args.list_raters:
        utils.print_raters_list(args.verbosity)
        sys.exit(0)

    rat_inst_id_cache = utils.RatingInstanceIDCache(DBNAME)
    loaded_raters = {}
    for rater_name in args.raters:
        rater_module = getattr(raters, rater_name)
        rater = rater_module.Rater()
        loaded_raters[(rater.long_name, rater.version)] = rater
  
    db = database.Database(DBNAME)
    try:
        for rater in loaded_raters.values():
            rating_instance_id = rat_inst_id_cache.get_id(rater.long_name, \
                                                          rater.version, \
                                                          rater.description)
            header_ids = get_beams_to_rate(rating_instance_id)
            print "For rater %s have %d beams to rate." % (rater.long_name,len(header_ids))

            for header_id in header_ids:
                # For candidates with this header_id find which current ratings 
                # are not computed.
                #
                # NOTE: We use 'r.pdm_rating_instance_id' in the WHERE clause
                # because it will be NULL if a rating does not exist in
                # the 'pdm_rating' table. However, it _will_ be set if the rating
                # exists, but has a value of NULL (i.e. the rating failed). 
                # If we were used 'r.value' instead, we would try to re-compute
                # failed ratings.
                query = "SELECT c.pdm_cand_id, " \
                            "rt.name, " \
                            "ri.version " \
                        "FROM pdm_candidates AS c WITH(NOLOCK) " \
                        "CROSS JOIN (SELECT rt.pdm_rating_type_id, " \
                                        "MAX(ri.pdm_rating_instance_id) " \
                                            "AS current_instance_id " \
                                    "FROM pdm_rating_instance AS ri WITH(NOLOCK) " \
                                    "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \
                                        "ON ri.pdm_rating_type_id=rt.pdm_rating_type_id " \
                                    "GROUP BY rt.pdm_rating_type_id) AS ci " \
                        "LEFT JOIN pdm_rating_instance AS ri WITH(NOLOCK) " \
                            "ON ri.pdm_rating_instance_id=ci.current_instance_id " \
                        "LEFT JOIN pdm_rating AS r WITH(NOLOCK) " \
                            "ON r.pdm_cand_id=c.pdm_cand_id " \
                                "AND ri.pdm_rating_instance_id=r.pdm_rating_instance_id " \
                        "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \
                            "ON rt.pdm_rating_type_id=ri.pdm_rating_type_id " \
                        "WHERE c.header_id=? AND r.pdm_rating_instance_id IS NULL"
                db.execute(query, header_id)
                missing_ratings = db.fetchall()
 
                if not missing_ratings:
                    raise utils.RatingError("At least the current rating (%s) should " \
                                        "be missing for header_id=%d. (This is how the header "
                                        "IDs were selected.)" % (rater.long_name, header_id))
 
                # Get pfds for this header_id
                if DBNAME == 'common2' or DBNAME == 'common3':
                    tmpdir, fn_mapping = get_pfds_from_ftp(header_id)
                else:
                    tmpdir, fn_mapping = get_pfds_from_db(header_id)
 
                try:
                    rated_cands = []
                    # Rate pfds for this header_id
                    for cand_id, pfd_fn in fn_mapping.iteritems():
                        raters_to_use = [loaded_raters[(x[1], x[2])] for x in missing_ratings \
                                            if x[0]==cand_id and (x[1], x[2]) in loaded_raters]
                        cand = rate_pfd(os.path.join(tmpdir, pfd_fn), raters_to_use)
                        
                        # Add candidate ID number to facilitate uploading
                        cand.id = cand_id
                        rated_cands.append(cand)
                 
                    # Upload rating values
                    query_args = []
                    for cand in rated_cands:
                        if len(cand.rating_values):
                            query = "INSERT INTO pdm_rating " + \
                                    "(value, pdm_rating_instance_id, pdm_cand_id, date) "
                            for ratval in cand.rating_values:
                                if not ratval.value is None and np.abs(ratval.value) < 1e-307:
                                    ratval.value = 0.0

                                if not ratval.value is None and np.isinf(ratval.value):
                                    ratval.value = 9999.0
                                instance_id = rat_inst_id_cache.get_id(ratval.name, \
                                                                       ratval.version, \
                                                                       ratval.description)            

                                value = np.float(ratval.value) if not ratval.value is None else None

                                if value is None or np.isnan(value):
                                    query += "SELECT NULL, %d, %d, GETDATE() UNION ALL " % \
                                              (instance_id, cand.id)
                                else:
                                    query += "SELECT '%.12g', %d, %d, GETDATE() UNION ALL " % \
                                              (ratval.value, instance_id, cand.id)

                            query = query.rstrip('UNION ALL') # remove trailing 'UNION ALL' from query

                            db.execute(query)

                finally:    
                    # Remove the temporary directory containing pfd files
                    shutil.rmtree(tmpdir)
    finally:
        db.close()
Example #4
0
def main():
    if args.num_procs > 1:
        warning.warn("Multithreading not implemnted (%d threads requested)" % \
                            args.num_procs)
    
    if not args.raters:
        print "No raters are loaded."
        args.list_raters = True

    if args.list_raters:
        utils.print_raters_list(args.verbosity)
        sys.exit(0)

    rat_inst_id_cache = utils.RatingInstanceIDCache(DBNAME)
    loaded_raters = {}
    for rater_name in args.raters:
        rater_module = getattr(raters, rater_name)
        rater = rater_module.Rater()
        loaded_raters[(rater.long_name, rater.version)] = rater
  
    db = database.Database(DBNAME)
    try:
        for rater in loaded_raters.values():
            rating_instance_id = rat_inst_id_cache.get_id(rater.long_name, \
                                                          rater.version, \
                                                          rater.description)
            header_ids = get_beams_to_rate(rating_instance_id)
            for header_id in header_ids:
                # For candidates with this header_id find which current ratings 
                # are not computed.
                #
                # NOTE: We use 'r.pdm_rating_instance_id' in the WHERE clause
                # because it will be NULL if a rating does not exist in
                # the 'pdm_rating' table. However, it _will_ be set if the rating
                # exists, but has a value of NULL (i.e. the rating failed). 
                # If we were used 'r.value' instead, we would try to re-compute
                # failed ratings.
                query = "SELECT c.pdm_cand_id, " \
                            "rt.name, " \
                            "ri.version " \
                        "FROM pdm_candidates AS c WITH(NOLOCK) " \
                        "CROSS JOIN (SELECT rt.pdm_rating_type_id, " \
                                        "MAX(ri.pdm_rating_instance_id) " \
                                            "AS current_instance_id " \
                                    "FROM pdm_rating_instance AS ri WITH(NOLOCK) " \
                                    "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \
                                        "ON ri.pdm_rating_type_id=rt.pdm_rating_type_id " \
                                    "GROUP BY rt.pdm_rating_type_id) AS ci " \
                        "LEFT JOIN pdm_rating_instance AS ri WITH(NOLOCK) " \
                            "ON ri.pdm_rating_instance_id=ci.current_instance_id " \
                        "LEFT JOIN pdm_rating AS r WITH(NOLOCK) " \
                            "ON r.pdm_cand_id=c.pdm_cand_id " \
                                "AND ri.pdm_rating_instance_id=r.pdm_rating_instance_id " \
                        "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \
                            "ON rt.pdm_rating_type_id=ri.pdm_rating_type_id " \
                        "WHERE c.header_id=? AND r.pdm_rating_instance_id IS NULL"
                db.execute(query, header_id)
                missing_ratings = db.fetchall()
 
                if not missing_ratings:
                    raise utils.RatingError("At least the current rating should " \
                                        "be missing. (This is how the header "
                                        "IDs were selected.)")
 
                # Get pfds for this header_id
                if DBNAME == 'common2':
                    tmpdir, fn_mapping = get_pfds_from_ftp(header_id)
                else:
                    tmpdir, fn_mapping = get_pfds_from_db(header_id)
 
                try:
                    rated_cands = []
                    # Rate pfds for this header_id
                    for cand_id, pfd_fn in fn_mapping.iteritems():
                        raters_to_use = [loaded_raters[(x[1], x[2])] for x in missing_ratings \
                                            if x[0]==cand_id and (x[1], x[2]) in loaded_raters]
                        cand = rate_pfd(os.path.join(tmpdir, pfd_fn), raters_to_use)
                        
                        # Add candidate ID number to facilitate uploading
                        cand.id = cand_id
                        rated_cands.append(cand)
                 
                    # Upload rating values
                    query_args = []
                    for cand in rated_cands:
                        for ratval in cand.rating_values:
                            query_args.append((ratval.value, cand.id, \
                                        rat_inst_id_cache.get_id(ratval.name, \
                                                                ratval.version, \
                                                                ratval.description)))
                    if query_args:
                        query = "INSERT INTO pdm_rating " \
                                "(value, pdm_cand_id, pdm_rating_instance_id, date) " \
                                "VALUES (?, ?, ?, GETDATE())"
                        db.executemany(query, query_args)
                finally:    
                    # Remove the temporary directory containing pfd files
                    shutil.rmtree(tmpdir)
    finally:
        db.close()