Beispiel #1
0
def read_augmented_corpus(args, verbose=True):
    """
    Read the unannotated stage of the augmented corpus
    """
    aug_args = copy.copy(args)
    aug_args.annotator = None
    preselection = {'stage': ['unannotated']}
    is_interesting = mk_is_interesting(aug_args, preselected=preselection)
    reader = educe.stac.Reader(args.augmented)
    anno_files = reader.filter(reader.files(), is_interesting)
    return reader.slurp(anno_files, verbose)
Beispiel #2
0
def read_augmented_corpus(args, verbose=True):
    """
    Read the unannotated stage of the augmented corpus
    """
    aug_args = copy.copy(args)
    aug_args.annotator = None
    preselection = {'stage': ['unannotated']}
    is_interesting = mk_is_interesting(aug_args,
                                       preselected=preselection)
    reader = educe.stac.Reader(args.augmented)
    anno_files = reader.filter(reader.files(), is_interesting)
    return reader.slurp(anno_files, verbose)
Beispiel #3
0
def _read_corpus_inputs(args):
    """
    Read and filter the part of the corpus we want features for
    """
    is_interesting = mk_is_interesting(args, preselected={"stage": ["units"]})
    reader = educe.stac.Reader(args.corpus)
    anno_files = reader.filter(reader.files(), is_interesting)
    corpus = reader.slurp(anno_files, verbose=True)

    postags = postag.read_tags(corpus, args.corpus)
    parses = corenlp.read_results(corpus, args.corpus)
    LEXICON.read(args.resources)
    return FeatureInput(corpus=corpus,
                        postags=postags,
                        parses=parses,
                        lexicons=[LEXICON],
                        pdtb_lex=None,
                        verbnet_entries=None,
                        inquirer_lex=None)
Beispiel #4
0
def _read_corpus_inputs(args):
    """
    Read and filter the part of the corpus we want features for
    """
    is_interesting = mk_is_interesting(args,
                                       preselected={"stage": ["units"]})
    reader = educe.stac.Reader(args.corpus)
    anno_files = reader.filter(reader.files(), is_interesting)
    corpus = reader.slurp(anno_files, verbose=True)

    postags = postag.read_tags(corpus, args.corpus)
    parses = corenlp.read_results(corpus, args.corpus)
    LEXICON.read(args.resources)
    return FeatureInput(corpus=corpus,
                        postags=postags,
                        parses=parses,
                        lexicons=[LEXICON],
                        pdtb_lex=None,
                        verbnet_entries=None,
                        inquirer_lex=None)
# ---------------------------------------------------------------------
# args
# ---------------------------------------------------------------------

arg_parser = argparse.ArgumentParser(description='Dump EDU text' )
arg_parser.add_argument('idir', metavar='DIR',
                        help='Input directory'
                        )
educe_group = arg_parser.add_argument_group('corpus filtering arguments')
util.add_corpus_filters(educe_group, fields=[ 'doc' ])
args=arg_parser.parse_args()
args.subdoc    = None
args.stage     = 'unannotated'
args.annotator = None
is_interesting=util.mk_is_interesting(args)

# ---------------------------------------------------------------------
# main
# ---------------------------------------------------------------------

reader = educe.stac.Reader(args.idir)
anno_files = reader.filter(reader.files(), is_interesting)

trello  = tr.TrelloApi(secrets.apikey, secrets.token)
board   = trello.boards.get(board_id)
columns = trello.boards.get_list(board_id)
cards   = trello.boards.get_card(board_id)
subdocs = collections.defaultdict(list)
for k in anno_files:
    subdocs[k.doc].append(k.subdoc)
Beispiel #6
0
#            # then visit in your web browser to approve
#            # and paste in the resulting token

# ---------------------------------------------------------------------
# args
# ---------------------------------------------------------------------

arg_parser = argparse.ArgumentParser(description='Dump EDU text')
arg_parser.add_argument('idir', metavar='DIR', help='Input directory')
educe_group = arg_parser.add_argument_group('corpus filtering arguments')
util.add_corpus_filters(educe_group, fields=['doc'])
args = arg_parser.parse_args()
args.subdoc = None
args.stage = 'unannotated'
args.annotator = None
is_interesting = util.mk_is_interesting(args)

# ---------------------------------------------------------------------
# main
# ---------------------------------------------------------------------

reader = educe.stac.Reader(args.idir)
anno_files = reader.filter(reader.files(), is_interesting)

trello = tr.TrelloApi(secrets.apikey, secrets.token)
board = trello.boards.get(board_id)
columns = trello.boards.get_list(board_id)
cards = trello.boards.get_card(board_id)
subdocs = collections.defaultdict(list)
for k in anno_files:
    subdocs[k.doc].append(k.subdoc)