Ejemplo n.º 1
0
def start_loop(stream_name=None,
               network='s3fd',
               redis_host='localhost',
               redis_port=6379):
    """
    Starts the processing loop
    """
    killer = GracefulKiller()

    LOG.info('Initializing Redis cache...')
    cache = redis.StrictRedis(host=redis_host, port=redis_port, db=0)

    LOG.info('Initializing annotator...')
    annotator = Annotator(network=network)

    LOG.info('Starting processing loop...')
    while True:
        # Get next available frame from the cache
        image_bytes = cache.get('{}_raw'.format(stream_name))
        if image_bytes is None:
            continue

        # Annotate with bounding boxes
        # TODO: Get size from feed configs
        image = Image.frombytes('RGB', (1920, 1080), image_bytes)
        image = annotator.annotate(image)

        # Save frame to cache as raw bytes
        cache.set('{}_annotated'.format(stream_name), image.tobytes())

        # Capture kill signals and terminate loop
        if killer.kill_now:
            LOG.info('Shutting down gracefully...')
            break
Ejemplo n.º 2
0
def executeAnnotator(logger, startTime, datetime, confpath, pattern, paths):
    print("execute Annotator")
    momentum = datetime.now()
    now = momentum - startTime
    l = list(paths)
    for path in l:
        mags = results = None
        mags, results = readKataMagazines(logger)
        units = None
        magazines = None
        results, magazines, minl, maxl = readFile(logger, pattern, now, path,
                                                  mags)
        annotator = Annotator(None, confpath)
        units = annotator.doAnnotationWithConfig(results, mags, magazines)
        momentum = logQueryData(logger, momentum, units, None)
        print("execute Annotator")

    writeResultsToRDF(units)

    now = datetime.now() - momentum
    end = datetime.now() - startTime

    print("Finished queries in " + str(now))
    print("REACHED THE END in " + str(end))
    logger.info("Application execution ended, and it lasted for " + str(end))
Ejemplo n.º 3
0
 def __init__(self, switch) :
     self.switch = switch.lower()
     Annotator.__init__(self, "sift" )
     self.input_file = "%s/input/%s.csv" % (self.root_dir,switch)
     self.output_dir = "%s/output" % (self.root_dir)
     self.indexOf = {"coordinates" : 0, \
                     "codons" : 1, \
                     "transcript id" : 2, \
                     "protein id" : 3, \
                     "substitutions" : 4, \
                     "region" : 5, \
                     "dbsnp id" : 6, \
                     "snp type" : 7, \
                     "prediction" : 8, \
                     "score" : 9, \
                     "median info" : 10, \
                     "# seqs at position" : 11, \
                     "gene id" : 12, \
                     "gene name" : 13, \
                     "gene desc" : 14, \
                     "omim disease" : 15, \
                     "average allele freqs" : 16, \
                     "ceu allele freqs" : 17, \
                     "user comment" : 18}
     
     self.cols = ["region","snp type", "prediction","score","omim disease"]
     self.db_cols = ["sift_region","sift_type","sift_prediction","sift_score","sift_omim"]
Ejemplo n.º 4
0
 def annotate_data(self, nr_docs=-1):
     self.logger.println("data annotator called")
     start_time = timeit.default_timer()
     annotator = Annotator()
     annotator.prepare_dataset(nr_docs)
     elapsed_seconds = timeit.default_timer() - start_time
     self.logger.print_time_taken("data annotation operation took",
                                  elapsed_seconds)
Ejemplo n.º 5
0
    def generate_annotator(self, event):
        annotation = Annotator(self.channel_fnames, self.probability_table,
                               self.output_dir)
        new_table = annotation.annotate([self.x0, self.y0], [self.x1, self.y1])

        # add the new table to the groundtruth table
        self.groundtruth_table.loc[
            new_table.index,
            self.column_names] = new_table.loc[:, self.column_names]
Ejemplo n.º 6
0
def run_annotator():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--source-files',
                        dest='source_files',
                        type=str,
                        nargs='+',
                        help='')
    args = parser.parse_args()
    anno = Annotator()
    anno.annotate(args.source_files)
Ejemplo n.º 7
0
class TestAnnotator(unittest.TestCase):
    def setUp(self):
        self.annotator = Annotator(TestFiles.make_io_helper())

    def test_can_create(self):
        assert self.annotator

    def test_creates_prefixes_without_information(self):
        self.assertEqual(self.annotator.save_as_turtle(as_string=True), \
                         TestFiles.PREFIXES)

    def test_includes_added_organism_into_turtle(self):
        self.annotator.add_bug(*TestFiles.BOUNDING_BOX)
        self.assertEqual(self.annotator.save_as_turtle(as_string=True), \
                         TestFiles.make_rdf_file())
Ejemplo n.º 8
0
    def __init__(self, dock_area, params_tree, config):
        self.dock_area = dock_area
        self.db = ParameterDB()
        self.curves = {}
        self.qtree_widget_items = {}
        self.plot_widget_items = []
        # self.params = {}    # [Parameter, Plot, Curve, QTreeWidgetItem]
        self.params_tree = params_tree
        self.last_dock_added = None
        self.xlink = None
        self.annotators = {
            a['param']: Annotator(a['param'], a)
            for a in config.get('annotators', [])
        }

        self.auto_pens = [
            pg.mkPen('#f00'),
            pg.mkPen('#0f0'),
            pg.mkPen('#00f'),
            pg.mkPen('#AA0'),
            pg.mkPen('#0AA'),
            pg.mkPen('#A0A'),
        ]

        self.initialize_plots(config)
Ejemplo n.º 9
0
class TestAnnotator(unittest.TestCase):
    def setUp(self):
        self.annotator = Annotator(TestFiles.make_io_helper())


    def test_can_create(self):
        assert self.annotator


    def test_creates_prefixes_without_information(self):
        self.assertEqual(self.annotator.save_as_turtle(as_string=True), \
                         TestFiles.PREFIXES)


    def test_includes_added_organism_into_turtle(self):
        self.annotator.add_bug(*TestFiles.BOUNDING_BOX)
        self.assertEqual(self.annotator.save_as_turtle(as_string=True), \
                         TestFiles.make_rdf_file())
Ejemplo n.º 10
0
def default_annotator(mix_path, src_track_paths):
    from nodes import Unmixer, GenericProvider, SegmentFinder, XFadeFinder, Fingerprinter
    from annotator import Annotator
    return Annotator([
        GenericProvider(src_track_paths=src_track_paths, mix_path=mix_path),
        Fingerprinter(lazy=False),
        Unmixer(),
        XFadeFinder(),
        SegmentFinder()
    ])
Ejemplo n.º 11
0
    def test_sanity_check(self):
        """A sanity check test just to be sure that nothing is broken when
        making some change to the code."""

        with open('tests/report.txt') as f:
            text = f.read()

        annot = Annotator.RadlexAnnotator()
        annotations = annot.annotate(text)

        self.assertEqual(77, len(annotations))
Ejemplo n.º 12
0
    def handle_resume_post(self):
        # Get the name of the uploaded file
        file = request.files['file']

        if file and self.__allowed_file(file.filename):
            # Save file to upload folder
            file.save(os.path.join(self.__app.config['UPLOAD_FOLDER'], file.filename))

            # use crf here
            annotator = Annotator()
            annotated_resume = annotator.annotate_using_trained_model(self.__app.config['UPLOAD_FOLDER'] + self.__seperator + file.filename)

            tagged_resume = self.__crfsuite.tag_doc(annotated_resume)

            template = render_template('%s.xml' % self.__path_output_xml, entities=tagged_resume)
            response = make_response(template)
            response.headers['Content-Type'] = 'application/xml'

            return response

        else:
            return "Invalid file type, use PDF, DOC or DOCX", 406
Ejemplo n.º 13
0
class Caption(Resource):
    """REST end point for the captioning service.
    
    Accepts images (via HTTP POST requests) and returns corresponding captions.
    """
    def __init__(self):
        self.annotator = Annotator()

    def post(self):
        """Create caption for an image.
        
        The POST request should include either an image or a url pointing to an image.

        Parameters
        ----------
        image
            Image data send with the request.
        url
            URL of the image to be processed.
        """
        parser = reqparse.RequestParser()
        parser.add_argument("image",
                            type=datastructures.FileStorage,
                            location='files')
        parser.add_argument("url")
        args = parser.parse_args()
        if args.image:
            image = Image.open(args.image)
        else:
            ssl._create_default_https_context = ssl._create_unverified_context
            with urllib.request.urlopen(args.url) as url:
                f = io.BytesIO(url.read())

            image = Image.open(f)

        annotation = self.annotator.annotate(image)
        data = {
            "annotation": annotation,
        }
        return data, 201
Ejemplo n.º 14
0
from fastapi import FastAPI
from annotator import Annotator
from starlette.responses import JSONResponse
from starlette.middleware.cors import CORSMiddleware
from pydantic import BaseModel

annotator = Annotator()
app = FastAPI()
origins = ["*"]
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_methods=['*'],
    allow_headers=['*'],
)


class Text(BaseModel):
    text: str


@app.post('/annotate_text')
def annotate_text(text: Text):
    return JSONResponse(annotator.annotate(text.text))


@app.get('/', status_code=200)
async def healthcheck():
    return 'Annotator is ready!'
Ejemplo n.º 15
0
 def setUp(self):
     self.annotator = Annotator(TestFiles.make_io_helper())
Ejemplo n.º 16
0
 def setUp(self):
     self.annotator = Annotator(TestFiles.make_io_helper())
Ejemplo n.º 17
0
def build_app(config_file):
    global annotatr
    annotatr = Annotator.from_config(config_file)
    return app
Ejemplo n.º 18
0
    # make Qt logs go to logger
    QtCore.qInstallMessageHandler(qt_message_handler)
    logger = logging.getLogger(__name__)
    logger.info('Annotator %s starting on %s (%s)', '0.0.0.0', platform.system(), sys.platform)

    if platform.system() == 'Windows':
        logger.info('Applying Windows-specific setup')

        # enable automatic scaling for high-DPI screens
        os.environ['QT_AUTO_SCREEN_SCALE_FACTOR'] = '1'

        # set the App ID for Windows 7 to properly display the icon in the
        # taskbar.
        import ctypes
        myappid = '??????'  # arbitrary string
        try:
            ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID(myappid)
        except:
            logger.error('Could not set the app model ID. If the plaftorm is older than Windows 7, this is normal.')

    elif platform.system() == 'Darwin':
        pass
        # logger.info('Applying Mac OS-specific setup')

    app = QApplication(sys.argv)
    window = Annotator()
    window.show()
    return_code = app.exec_()
    del window  # prevent mac errors
    sys.exit(return_code)
Ejemplo n.º 19
0
    ap.add_argument("-v",
                    "--version",
                    dest="version",
                    default=False,
                    action="store_true",
                    help="displays the current version of the application")

    return ap.parse_args()


if __name__ == "__main__":
    arguments = parse_args()

    if arguments.test:
        doctest.testmod()  # unit testing
        sys.exit()

    # displays version of the program
    if arguments.version:
        sys.exit("{} {}".format(APP_TITLE, VERSION_NUMBER))

    # creates the annotation engine
    annotator = Annotator()

    if arguments.fullscreen:
        annotator.toggle_fullscreen()  # toggles fullscreen

    annotator.parent.protocol("WM_DELETE_WINDOW",
                              annotator.exit_prompt)  # quit event handler
    annotator.mainloop()  # runs the main tkinter loop
Ejemplo n.º 20
0
    # hyper parameters. May modify to change the behaviour of the model
    # l determines the importance of the foreground and background probability terms
    # bins determines how many bins to use in the histograms
    l = 1
    num_bins = 2

    # Get image
    if len(sys.argv) == 2:
        imgURL = sys.argv[1]
    else:
        imgURL = "http://www.python.org/static/community_logos/python-logo.png"
    img = getImage(imgURL)

    #annotate it with foreground and background points
    annotator = Annotator(img)

    # extract foreground and background points
    fg = annotator.fg
    bg = annotator.bg

    if (len(fg) == 0) or (len(bg) == 0):
        print("No Foreground or Background points selected, exiting")
        sys.exit()

    #time Imitalisation
    start = time.clock()

    print("Converting to grayscale and computing statistics")
    #compute intensities (grayscale) and collect statistics
    gray = rgb2gray(img)
Ejemplo n.º 21
0
            # Save the clip
            print('\rClip %d complete' % clip_counter, end=' ')
            clip.release()
            clip_time = 0
            clip_counter += 1
            clip = cv2.VideoWriter(
                os.path.join(clips_folder, 'clip_%04d.mp4' % clip_counter),
                fourcc, fps, (fdim[1], fdim[0]))

        if video_time < n_frames - 1:
            video_time += 1
        else:
            cap.release()
            break

# Run the annotator
annotator = Annotator([{
    'name': 'result_table',
    'color': (0, 1, 0)
}, {
    'name': 'olympics_logo',
    'color': (0, 0, 1)
}, {
    'name': 'stretching',
    'color': (0, 1, 1)
}],
                      clips_folder,
                      N_show_approx=100,
                      annotation_file='demo_labels.json')

annotator.main()
Ejemplo n.º 22
0
if not os.path.exists(clips_folder):
    os.mkdir(clips_folder)

# Initialise the annotator
annotator = Annotator(
    # [
    # {'name': 'clarity 100', 'color': (0, 255, 0)},
    # {'name': 'clarity 80', 'color': (0, 0, 255)},
    # {'name': 'clarity 60', 'color': (0, 255, 255)},
    # {'name': 'clarity 40', 'color': (255, 100, 0)},
    # {'name': 'clarity 20', 'color': (0, 100, 255)}],
    [
    {'name': '1', 'color': (0, 255, 0)},
    {'name': '2', 'color': (0, 0, 255)},
    {'name': '3', 'color': (0, 255, 255)},
    {'name': '4', 'color': (255, 100, 0)},
    {'name': '5', 'color': (0, 100, 255)},
    {'name': '6', 'color': (0, 100, 50)},
    {'name': '7', 'color': (0, 150, 100)},
    {'name': '8', 'color': (50, 100, 255)},
    {'name': '9', 'color': (100, 50, 50)},
    {'name': '10', 'color': (50, 100, 150)},
    {'name': '11', 'color': (100, 100, 200)}
    ],

    clips_folder, sort_files_list=True, N_show_approx=20, screen_ratio=16 / 9,
    image_resize=1, loop_duration=None, annotation_file='ourdata_section.json')

# Split the video into clips
print('Generating clips from the video...')
annotator.video_to_clips(ourdata_filename, clips_folder, clip_length=150, overlap=0, resize=1)
Ejemplo n.º 23
0
    'name': 'sluty',
    'color': (0, 0, 1)
}, {
    'name': 'sexy',
    'color': (0, 1, 1)
}, {
    'name': 'normal',
    'color': (0, 1, 0)
}]
# Initialise MuViLab

name = 'H.mp4'
fn = 'E:\\download\\593310496_saturdays85\\test\\%s' % name

clips_folder = './%s' % name.rsplit('.', 1)[0]
# Split the main video into clips
annotator = Annotator(labels,
                      clips_folder,
                      annotation_file='%s.json' % name,
                      N_show_approx=10)
import os
if not os.path.exists(clips_folder):
    os.mkdir(clips_folder)
    annotator.video_to_clips(fn,
                             clips_folder,
                             clip_length=1200,
                             overlap=0,
                             resize=0.5)
# Run the GUI
annotator.main()
    def get_ies_scores(self):
        extractor = Extractor()
        ies_filenames = extractor.populate_file_names(self.__ies_accuracy_test)
        ies_filenames = extractor.filter_by_valid_exts(ies_filenames)
        filenames, resume_content = extractor.read_resume_content_tika_api(
            ies_filenames, self.__ies_accuracy_test)
        filenames, resume_content = extractor.remove_empty_resumes(
            filenames, resume_content)
        resume_labels = extractor.read_resume_labels(self.__ies_accuracy_test,
                                                     filenames)

        true_edu_insts = [
            extractor.get_edu_institutions(xml_tree)
            for xml_tree in resume_labels
        ]
        true_edu_majors = [
            extractor.get_edu_majors(xml_tree) for xml_tree in resume_labels
        ]
        true_emp_names = [
            extractor.get_company_names(xml_tree) for xml_tree in resume_labels
        ]
        true_emp_jtitles = [
            extractor.get_job_titles(xml_tree) for xml_tree in resume_labels
        ]

        cs = CrfSuite()
        cs.load_tagger()
        annotator = Annotator()
        annotated_resumes = [
            annotator.annotate_using_trained_model(self.__ies_accuracy_test +
                                                   self.__seperator +
                                                   filename[0] + filename[1])
            for filename in filenames
        ]
        predicted_entity_list = [
            cs.tag_doc(resume) for resume in annotated_resumes
        ]

        ies_edu_insts = [
            extractor.get_edu_institutions_from_list(entity_list)
            for entity_list in predicted_entity_list
        ]
        ies_edu_majors = [
            extractor.get_edu_major_from_list(entity_list)
            for entity_list in predicted_entity_list
        ]
        ies_emp_names = [
            extractor.get_company_names_from_list(entity_list)
            for entity_list in predicted_entity_list
        ]
        ies_emp_jtitles = [
            extractor.get_company_position_from_list(entity_list)
            for entity_list in predicted_entity_list
        ]

        tokeniser = Tokeniser()
        true_edu_insts = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(true_edu_insts))
        true_edu_majors = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(true_edu_majors))
        true_emp_names = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(true_emp_names))
        true_emp_jtitles = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(true_emp_jtitles))

        ies_edu_insts = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(ies_edu_insts))
        ies_edu_majors = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(ies_edu_majors))
        ies_emp_names = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(ies_emp_names))
        ies_emp_jtitles = tokeniser.docs_tolower(
            tokeniser.tokenise_doclines_to_words(ies_emp_jtitles))

        edu_insts_match_score = self.score_matches(ies_edu_insts,
                                                   true_edu_insts)
        edu_majors_match_score = self.score_matches(ies_edu_majors,
                                                    true_edu_majors)
        emp_names_match_score = self.score_matches(ies_emp_names,
                                                   true_emp_names)
        emp_jtitles_match_score = self.score_matches(ies_emp_jtitles,
                                                     true_emp_jtitles)
        print(edu_insts_match_score)
        print(edu_majors_match_score)
        print(emp_names_match_score)
        print(emp_jtitles_match_score)
Ejemplo n.º 25
0
def build_app(config_file):
    global annotatr
    annotatr = Annotator.from_config(config_file)
    return app
Ejemplo n.º 26
0
#clips forlder for this video
clips_folder = os.path.join(clips_folder,each)
annotation_path = os.path.join("annotation",each)

annotation_path_for_vid = os.path.exists(os.path.join(annotation_path,"labels.json"))
if not os.path.exists(annotation_path):
    os.makedirs(annotation_path)

# Initialise the annotator
annotator = Annotator([
        {'name': 'goal', 'color': (0, 255, 0)},
        {'name': 'others', 'color': (0, 0, 255)},
        {'name': 'startgame', 'color': (0, 255, 255)},
        {'name': 'endgame', 'color': (255, 255, 255)},
        {'name': 'replay_goal', 'color': (0, 0, 0)},
        {'name': 'resume','color':(64,244,226)},
        {'name':'SOT','color':(66, 86, 244)},
        {'name':'play','color':(244, 155, 65)},
        {'name':'replay','color':(193, 52, 156)}],
        clips_folder, sort_files_list=True, N_show_approx=20, screen_ratio=16/9, 
        image_resize=1, loop_duration=None, annotation_file=os.path.join(annotation_path,'labels.json'))

if not os.path.exists(clips_folder):
    # Split the video into clips
    os.makedirs(os.path.join(clips_folder))
    print('Generating clips from the video...')
    annotator.video_to_clips(os.path.join(videos_folder,each), clips_folder, clip_length=60, overlap=0, resize=0.5)

if annotation_path_for_vid:
	if loadflag is None:
	    resp = input("the anotations will be overwritten. continue? (y/n)")
Ejemplo n.º 27
0
# Create the clips folder
clips_folder = 'test_overlap_clips'
if os.path.exists(clips_folder):
    shutil.rmtree(clips_folder)
os.makedirs(clips_folder)

# Test the annotator
from annotator import Annotator
# Initialise the annotator
annotator = Annotator([{
    'name': 'test_label_1',
    'color': (0, 1, 0)
}, {
    'name': 'test_label_2',
    'color': (0, 0, 1)
}, {
    'name': 'test_label_3',
    'color': (0, 1, 1)
}],
                      clips_folder,
                      loop_duration=2,
                      annotation_file='overlap_annotation.json',
                      status_file='overlap_status.json')
# Create the overlapping clips
annotator.video_to_clips('dummy_digits.mp4',
                         clips_folder,
                         resize=0.5,
                         overlap=0.5,
                         clip_length=6)
# Run!
annotator.main()
Ejemplo n.º 28
0
def main():

    images_path = get_image_path_from_user()

    annotator = Annotator(categories=["dog", "cat"], images_path=images_path)
    annotator.begin_annotation()
Ejemplo n.º 29
0
    yt = YouTube('https://www.youtube.com/watch?v=VZvoufQy8qc')
    stream = yt.streams.filter(res='144p', mime_type='video/mp4').first()
    print('Downloading youtube file. This may take a while.\n' +
          'Let\'s be honest, this _will_ take a while...')
    stream.download(demo_folder, filename='youtube')

# Initialise the annotator
annotator = Annotator([{
    'name': 'result_table',
    'color': (0, 1, 0)
}, {
    'name': 'olympics_logo',
    'color': (0, 0, 1)
}, {
    'name': 'stretching',
    'color': (0, 1, 1)
}],
                      clips_folder,
                      sort_files_list=True,
                      N_show_approx=100,
                      screen_ratio=16 / 9,
                      image_resize=1,
                      loop_duration=None,
                      annotation_file='demo_labels.json')

# Split the video into clips
print('Generating clips from the video...')
annotator.video_to_clips(youtube_filename,
                         clips_folder,
                         clip_length=90,
                         overlap=0,
def bccwj_pipeline(input_path, output_path):
    a = Annotator()
    file_names = glob.glob(os.path.join(input_path, '**/*.xml'),
                           recursive=True)
    docs = load_text(file_names, encoding='utf-8')
    save_file(docs, a, output_path)
Ejemplo n.º 31
0
def main(argv):
    #logging detup
    logger = logging.getLogger('myapp')
    hdlr = logging.FileHandler('/tmp/myapp.log')
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.DEBUG)

    startTime = datetime.now()
    logger.info("Application execution started at " + str(startTime))

    #retrieve options for this job
    configfile, filepattern, inputfilepath, csvlogging, target_format, source_file, source_format, special = extractConfigurations(
        argv)

    momentum = datetime.now()
    now = momentum - startTime
    units = None
    counter = 1
    annotator = Annotator(None, configfile)

    full_list_magazines = None
    minm = 0
    maxm = 0
    print(filepattern)
    print(inputfilepath)
    print(configfile)
    #kata=True

    for path in inputfilepath:
        try:
            mags = results = None
            #print("Before anything")
            #print(full_list_magazines)
            if special == "kata":
                mags, results = readKataMagazines(logger)

            magazines = None
            results, magazines, min, max = readFile(logger, filepattern, now,
                                                    path, mags)
            minm, maxm = checkLength(min, minm, maxm, logger)
            minm, maxm = checkLength(max, minm, maxm, logger)
            #print("After reading files")
            #print(minm)
            #print(maxm)
            #print(results)
            #print(magazines)
            #print(full_list_magazines)
            u = annotator.doAnnotationWithConfig(results, mags, magazines,
                                                 csvlogging, units)
            #print("After annotating them")
            #print(magazines)
            if full_list_magazines != None:
                for m in magazines:
                    if m not in full_list_magazines:
                        full_list_magazines.append(m)
                #print(magazines)
                #print(full_list_magazines)
            else:
                full_list_magazines = magazines

            #for m in full_list_magazines:
            #    print(m)

            if units != None:
                #units.extend(u)
                for node in u:
                    if node not in units:
                        units.append(node)
            else:
                units = u
            logger.info("Processed " + str(counter) + "/" +
                        str(len(inputfilepath)))
            counter = counter + 1
            #print("After everything")
            #print(full_list_magazines)

            #if counter == 10:
            #remove this later
        except Exception as e:
            print("Error happened during execution: " + str(path))
            print("Error happened during execution: " + str(e))
            logger.warn(
                "Unexpected error while processing data " + str(path) + ":", e)
            error = traceback.format_exc()
            print(error.upper())

    if full_list_magazines != None:
        writeTextOutput(full_list_magazines)
    else:
        logger.error("magaine list is empty!")

    rank = annotator.doRanking()
    tfidf, limit = logCandicates(logger, full_list_magazines, rank)
    #logger.debug(tfidf)
    if units != None:
        print("Check ranking" + str(limit))
        rank, rank_range = annotator.doRanking()
        print(len(rank))
        if len(rank) > 0:
            limits = (minm, maxm)
            logger.info("Execute candidate ranking for " + str(rank) + " " +
                        str(limit))
            apply_weights(units, tfidf, rank, logger, limits, rank_range,
                          limit)
            #use when using ranges
            #apply_weights(units, tfidf, rank, logger, limits, rank_range)
        print("convert to rdf")
        writeResultsToRDF(units, annotator, counter, target_format,
                          source_file, source_format)
        writeXmlOutput(full_list_magazines)
        writeCSVOutputOfResults(full_list_magazines)
        annotator.writeToCSV(full_list_magazines)
        annotator.logConseptsByIndex(full_list_magazines)

        #writeResultsToRDF(u,annotator,counter, target_format, source_file, source_format)
    annotator.print_filtered_terms(full_list_magazines)
    annotator.print_included_terms(full_list_magazines)
    annotator.print_stats(full_list_magazines)

    now = datetime.now() - momentum
    end = datetime.now() - startTime

    print("Finished queries in " + str(now))
    print("REACHED THE END in " + str(end))
    logger.info("Application execution ended, and it lasted for " + str(end))
def mainichi_pipeline(input_path, output_path):
    a = Annotator()
    file_names = glob.glob(os.path.join(input_path, '*.sgml'))
    docs = load_text(file_names, encoding='shift_jis')
    save_file(docs, a, output_path)
Ejemplo n.º 33
0
 def __init__(self):
     self.annotator = Annotator()
Ejemplo n.º 34
0
 def __init__(self ) :
     Annotator.__init__(self,"snpeff")
     self.indexOf = indexOf
 def open_file(self, filename, revert=False):
   """This method reads a file contents and inserts the text read into the text
   buffer. Several things are done while the file is read: Tags are applied
   according to the type of place, the previous choices are also read and, if
   any are found, they are applied to the tags found, so that progress can be
   halted and resumed in further sessions. An Annotator object is created (or
   retrieved, in case of a file that is simply being revisited in this
   session), which will hold the annotations of the user.
   """
   
   # Store the name of the file currently opened
   self.current_filename = filename
   
   # Create or retrieve the Annotator and TextBuffer objects assigned to this
   # file. If we are asked to revert, then do not run this block
   if filename in self.results and not revert:
     # We have already seen this file, so a Annotator and a TextBuffer
     # objects were already constructed for it
     self.current_result = self.results[filename]
     self.current_buffer = self.current_result.buffer
     
     # When reusing these objects, we don't need to open the file again, but
     # simply to switch the text buffer associated to the text view. To do
     # this, we run the post_open_file() method, that also takes care of the
     # rest of the window. We don't need the pre_open_file() method because the
     # progress bar will never be needed.
     self.post_open_file()
     
     # No further processing needed
     return
   
   # Do not change the modified flag for this file
   self.backend = True
   
   # Create an empty TextBuffer to hold the text
   self.current_buffer = self.new_buffer()
   
   # If we are not reverting, then try to open the .ann file.
   if not revert:
     # Even if we are seeing this file for the first time, choices may already
     # have been made for it and saved in the disk. In that case, retrieve the
     # Annotator object from the file on disk.
     
     ann_filename = ANNOTATOR_TEMPLATE % clean_filename(filename)
     try:
       fh = self.get_ann_file(ann_filename)
     except IOError:
       # The file does not exist or is unreadable, so we will not use it
       pass
     else:
       # The file was successfully opened. Give the file descriptor to the method
       # that creates a new instance of the Annotator object with the information
       # read from the file.
       self.current_result = Annotator.from_saved(fh, self.current_buffer,
                                                  self.current_filename)
       self.results[filename] = self.current_result
       
       # As above, no further processing of the file is needed; just user
       # interface stuff
       self.post_open_file()
       
       # Further changes are user-made, so they must be processed
       self.backend = False
       
       return
   
   # Start the Annotator object as an empty instance
   self.results[filename] = self.current_result = \
     Annotator(self.current_buffer, self.current_filename)
   
   # Prepare for the opening process.
   self.pre_open_file()
   
   # Get the contents of the file as nodes
   f = self.get_input_file(filename)
   nodes = xml_utils.extract_nodes(f)
   
   # Record the number of places
   n_places = sum(1 for i in nodes if i[1] != "text")
   place_index = 0
   
   for node in nodes:
     # node comes from the xml_utils.extract_nodes() function, which returns
     # several tuples. Each tuple describes a string of data in the file:
     # text, explicit places (with GeoNetID, ...) or implicit places
     
     text, type = node[0], node[1]
     
     # Types are either "text", "explicit" or "implicit", with everything
     # except "text" signaling a place tag
     is_place = type != "text"
     
     if is_place:
       # Store the original name found on the file
       original_text = text
       
       # The position of the current cursor is the place this piece of text
       # will be inserted on. We create a mark (whose position remains fixed
       # relative to its surroundings) because the rest of the text may change.
       # The mark is created with left gravity because more text will be added
       # by the method, but it must change to right_gravity later on, so that
       # newly added text does not get inserted in the name of the place.
       start_iter = self.get_cursor_iter()
       start_mark = self.current_buffer.create_mark(None, start_iter, True)
       
       # We want to slightly change the visible text for implicit places
       if type == "implicit":
         text = "(" + text + ARROW + ")"
     
     # Insert the text in the current position of the cursor 
     self.current_buffer.insert_at_cursor(text)
     
     # When the node is a place, there are other things that must be done
     if is_place:
       # Put a mark on the end of the text, to signal the end of the place
       # name. This mark should have left gravity and remain so, because text
       # added after it must not modify the position of the mark in relative to
       # the place.
       end_iter = self.get_cursor_iter()
       end_mark = self.current_buffer.create_mark(None, end_iter, True)
       
       # As explained above, we need to recreate the start_mark with right
       # gravity
       start_iter = self.current_buffer.get_iter_at_mark(start_mark)
       start_mark = self.current_buffer.create_mark(None, start_iter, False)
       
       # Now we need to retrieve from the database more information about the
       # place.
       if type == "explicit":
         # node[2] contains triples with domain (physical or administrative),
         # GeoNet ID and type of location respectively
         possibilities = [(int(i[1]), i[2]) for i in node[2]]
       
       elif type == "implicit":
         # When dealing with implicit places, we only have the name. Retrieve
         # the possible GeoNet IDs from the database
         possibilities = self.find_by_name(node[0])
       
       # We also want the municipality name of the place to be present in the
       # possibilities list
       possibilities = [(i, j, self.find_municipality(i))
                        for i, j in possibilities]
       
       # We now add all the information to the self.current_result object
       self.current_result.add(original_text, start_mark, end_mark, type,
                               possibilities)
       
       # Increase the index of the places and update the progress bar showing
       # how much of the file has been gathered and processed
       place_index += 1
       self.update_progress_bar(place_index, n_places)
   
   # Format the text to give cues about each place's status
   self.current_result.format_buffer()
     
   # After opening the file, several operations must be performed
   self.post_open_file()
   
   # Further changes are user-made, so they must be processed
   self.backend = False
Ejemplo n.º 36
0
def test(nlp,
         src,
         gen,
         bert=False,
         print_annotations=False,
         print_latex=False,
         verbose=False):
    if print_annotations:
        print("source:", src_line[:50])
        print("summary:", gen_line[:50])
    src = nlp(src)
    gen = nlp(gen)
    if verbose:
        print("clusters:", src._.coref_clusters, gen._.coref_clusters)
    ce = CompoundEquivalency()
    spe = SpeakerPronounEquivalency()
    spe.register(src)
    spe.register(gen)
    kg = KnowledgeGraph(nlp,
                        use_bert=bert,
                        equivalencies=[ce, spe],
                        verbose=verbose)
    if print_annotations:
        annotator = Annotator(src, gen, latex=print_latex)
    kg.add_document(src)
    contained = 0
    contained_bert = 0
    missing = 0
    missing_verb = 0
    missing_actors = 0
    missing_acteds = 0
    contradiction = 0
    contradiction_bert = 0
    invalid_simplification = 0
    total = 0
    for token in gen:
        if token.pos_ == "VERB":
            total += 1
            relation = kg.get_relation(token)
            r = kg.query_relation(relation)
            if r[0] == KnowledgeGraph.entailment:
                if print_annotations:
                    print(util.format("contained", "blue", latex=print_latex),
                          "|", relation, "|", r[1])
                contained += 1
            if r[0] == KnowledgeGraph.entailment_bert:
                if print_annotations:
                    print(
                        util.format("contained (BERT)",
                                    "blue",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
                contained_bert += 1
            if r[0] == KnowledgeGraph.contradiction_bert:
                if print_annotations:
                    print(
                        util.format("contradiction (BERT)",
                                    "red",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
                contradiction_bert += 1
            elif r[0] == KnowledgeGraph.missing_dependencies:
                missing += 1
                if print_annotations:
                    print(
                        util.format("generic missing dependency",
                                    "yellow",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
            elif r[0] == KnowledgeGraph.missing_actors:
                missing_actors += 1
                if print_annotations:
                    print(
                        util.format("missing actors",
                                    "magenta",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
            elif r[0] == KnowledgeGraph.missing_acteds:
                missing_acteds += 1
                if print_annotations:
                    print(
                        util.format("missing acteds",
                                    "magenta",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
            elif r[0] == KnowledgeGraph.missing_verb:
                missing_verb += 1
                if print_annotations:
                    print(
                        util.format("missing verb",
                                    "magenta",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
            elif r[0] == KnowledgeGraph.invalid_simplification:
                invalid_simplification += 1
                if print_annotations:
                    print(
                        util.format("invalid simplification",
                                    "magenta",
                                    latex=print_latex), "|", relation, "|",
                        r[1])
            elif r[0] == KnowledgeGraph.contradiction:
                contradiction += 1
                if print_annotations:
                    print(
                        util.format("contradiction", "red", latex=print_latex),
                        "|", relation, "|", r[1])
            if print_annotations:
                annotator.annotate(relation, r)
    if print_annotations:
        annotated_document, annotated_summary = annotator.annotated()
        print("Document:", " ".join(annotated_document))
        print("Summary:", " ".join(annotated_summary))
    if total == 0:
        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    return 100.0 * contained / total, \
            100.0 * contained_bert / total, \
            100.0 * missing / total, \
            100.0 * missing_verb / total, \
            100.0 * missing_actors / total, \
            100.0 * missing_acteds / total, \
            100.0 * contradiction / total, \
            100.0 * contradiction_bert / total, \
            100.0 * invalid_simplification / total