Beispiel #1
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Approximate inference PWEHAC')
    parser.add_argument('config', type=str, help='the config file')
    parser.add_argument('--test_file', type=str, help='the dataset to run on')
    parser.add_argument('--outbase', type=str,
                        help='prefix of out dir within experiment_out_dir')
    parser.add_argument('--canopy_name', type=str,
                        help='name of output canopy dir (only used with '
                             'test_file)')
    parser.add_argument('--points_file', type=str,
                        help='path to the points file to evaluate with')
    parser.add_argument('--random_seed', type=str,default='config',
                        help='The random seed to use or ')
    args = parser.parse_args()

    config = Config(args.config)
    if args.test_file:
        config.test_files = [args.test_file]
        config.out_by_canopy = [args.canopy_name]
    if args.outbase:
        ts = args.outbase
    else:
        now = datetime.datetime.now()
        ts = "{:04d}-{:02d}-{:02d}-{:02d}-{:02d}-{:02d}".format(
            now.year, now.month, now.day, now.hour, now.minute, now.second)

    if args.random_seed != 'config':
        config.random_seed = int(args.random_seed)

    rand = random.Random(config.random_seed)
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys

from coref.models import new_model
from coref.util.Config import Config
from coref.train import new_trainer

if __name__ == "__main__":
    config = Config(sys.argv[1])
    model = new_model(config)
    trainer = new_trainer(config, model)

    def mention_pairs(filename):
        with open(filename, 'r') as fin:
            for idx, line in enumerate(fin):
                splt = line.split('\t')
                if len(splt) != 3:
                    print("Error on line %s" % idx)
                    print(line)
                yield splt[0], splt[1], splt[2]

    if trainer is not None:
        trainer.write_training_data(mention_pairs(config.pair_filename),
                                    config.batcher_filename)
Beispiel #3
0
import glob
import os
from shutil import copytree

from coref.util.Config import Config

if __name__ == "__main__":
    """Pass through to have the same format for baseline algs"""
    parser = argparse.ArgumentParser(description='Pass through')
    parser.add_argument('config', type=str, help='the config file')
    parser.add_argument('--outbase', type=str,
                        help='prefix of out dir within experiment_out_dir')
    parser.add_argument('--dataname', type=str, help='Name of dataset.')
    args = parser.parse_args()

    config = Config(args.config)
    if args.outbase:
        ts = args.outbase
        dataname = args.dataname
        ts = os.path.join(dataname, ts)
    else:
        now = datetime.datetime.now()
        ts = "{:04d}-{:02d}-{:02d}-{:02d}-{:02d}-{:02d}".format(
            now.year, now.month, now.day, now.hour, now.minute, now.second)

    debug = config.debug

    diagnostics = {}

    # Set up output dir
    config.experiment_out_dir = os.path.join(
Beispiel #4
0
    parser = argparse.ArgumentParser(
        description='Approximate inference PWEHAC')
    parser.add_argument('config', type=str, help='the config file')
    parser.add_argument('--outbase',
                        type=str,
                        help='prefix of out dir within experiment_out_dir')
    parser.add_argument('--points_file',
                        type=str,
                        help='path to the points file to evaluate with')
    parser.add_argument('--random_seed',
                        type=str,
                        default='config',
                        help='The random seed to use or ')
    args = parser.parse_args()

    config = Config(args.config)
    if args.outbase:
        ts = args.outbase
    else:
        now = datetime.datetime.now()
        ts = "{:04d}-{:02d}-{:02d}-{:02d}-{:02d}-{:02d}".format(
            now.year, now.month, now.day, now.hour, now.minute, now.second)

    if args.random_seed != 'config':
        config.random_seed = int(args.random_seed)

    print('Using random seed %s' % config.random_seed)
    debug = config.debug
    rand = random.Random(config.random_seed)
    random.seed(config.random_seed)
    config.random = rand
Beispiel #5
0

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='EHAC on dataset')
    parser.add_argument('config', type=str, help='the config file')
    parser.add_argument('--test_file', type=str, help='the dataset to run on')
    parser.add_argument('--outbase', type=str,
                        help='prefix of out dir within experiment_out_dir')
    parser.add_argument('--canopy_name', type=str,
                        help='name of output canopy dir (only used with '
                             'test_file)')
    parser.add_argument('--points_file', type=str,
                        help='path to the points file to evaluate with')
    args = parser.parse_args()

    config = Config(args.config)
    if args.test_file:
        config.test_files = [args.test_file]
        config.out_by_canopy = [args.canopy_name]
    if args.outbase:
        ts = args.outbase
    else:
        now = datetime.datetime.now()
        ts = "{:04d}-{:02d}-{:02d}-{:02d}-{:02d}-{:02d}".format(
            now.year, now.month, now.day, now.hour, now.minute, now.second)

    debug = config.debug

    diagnostics = {}

    # Set up output dir
          "max_rec":
          "max_f1":
        }
    
    And a file: exp_out/rexa/2018-05-05-05-34-21/run_9/results/greedy/2018-05-05-11-00-06/run_9/results.tsv
    which has the same order of fields as above but is tab separated
    
    
    """
    parser = argparse.ArgumentParser(description='Evalute a set of runs.')
    parser.add_argument('indir', type=str, help='the root of the runs')
    args = parser.parse_args()
    indir = args.indir

    configs = find_all('config.json',indir)
    config = Config(configs[0])
    short_name = short_name_from_config(config)
    dataset_name = config.dataset_name
    
    micro_f1_files = find_all('micro_f1_thresholded.tsv',indir)
    
    precs,recs,f1s = [],[],[]
    for f in micro_f1_files:
        p,r,f1 = read_f1(f)
        precs.append(p)
        recs.append(r)
        f1s.append(f1)
    precs = np.array(precs)
    recs = np.array(recs)
    f1s = np.array(f1s)
    results = {