Example #1
0
    def __init__(self):
        """ Init the Detector. Will set extractors, NER models, filter, checker and dataframe"""
        # NER Models
        self.model = spacy.load("en_core_web_lg")
        self.ner = AlbertNER(os.path.join(MODELS_PATH, "conll03"))

        # Check data with movie database
        df_movies = pd.read_csv(os.path.join(ASSETS_PATH, "movies.csv"))
        df_movies = df_movies.loc[df_movies.actors.notna()]
        self.df_movies = df_movies

        # Extractors
        self.award_extractor = AwardsExtractor(df_movies)
        self.genre_extractor = GenreExtractor(df_movies)
        self.person_extractor = PersonExtractor(df_movies)
        self.rate_extractor = RateExtractor(df_movies)
        self.song_extractor = SongExtractor(df_movies)
        self.title_extractor = TitleExtractor(df_movies)
        self.trailer_extractor = TrailerExtractor(df_movies)
        self.year_extractor = YearExtractor(df_movies)
        self.extractors = [
            self.award_extractor, self.genre_extractor, self.person_extractor,
            self.rate_extractor, self.song_extractor, self.title_extractor,
            self.trailer_extractor, self.year_extractor
        ]

        # Filter
        self.filter = Filter()
        # Checker
        self.checker = Checker(self.filter, df_movies)
Example #2
0
    def post(self):
        token = request.args.get('token')
        verify = VerifyKey(KEYS_PATH, token)
        if(verify.isAuthorized()):
            url = request.json.get('url')
            url = requote_uri(url)
            full = request.json['options'].get('fullPage')
            formatType = request.json['options'].get('type')
            quality = request.json['options'].get('quality')
            tor = request.json['options'].get('tor')
            timeout = request.json['options'].get('timeout')
            browser = request.json['options'].get('browser')
            height = request.json['options'].get('height')
            width = request.json['options'].get('width')
           
            # Set defaults values
            if(quality == None):
                quality = 100
            if(tor == None):
                tor = False
            if(height == None):
                height = 600
            if(width == None):
                width = 800

            checker = Checker(url, full, formatType, quality, tor, timeout, browser, height, width)
            checkerAnswer = checker.verifyAll()

            if(checkerAnswer != 0):
                return {'error':checkerAnswer.first , 'error-description':checkerAnswer.second}

            netloc = urlparse(url).netloc
            netloc = netloc.replace('.', '_')
            netloc = netloc.replace(':', '_')
            ts = calendar.timegm(time.gmtime())
            filename = 'mps_{}_{}'.format(ts, netloc)

            screenshot = Screenshot(SCREENSHOT_PATH, FIREFOX_PATH, CHROME_PATH, TOR_PROFILE, TOR_URL)
            answer = screenshot.getImage(full, filename, url, formatType, tor, timeout, browser, height=height, width=width)

            if(answer == 0):
                mimeType = 'image/{}'.format(formatType)
                filename = '{}/{}.{}'.format(SCREENSHOT_PATH ,filename, formatType)
                return send_file(filename, mimetype=mimeType)
            else:
                return {'error':answer.first, 'error-description':answer.second}
        
        else:
            return {'error': Errors.UNAUTHORIZED.first, 'error-description': Errors.UNAUTHORIZED.second}
Example #3
0
def test_assertEquals_02():
    source = """
def test_1():
    assertEquals(True, False)
"""
    module = cst.parse_module(source)
    wrapper = cst.MetadataWrapper(module)
    checker = Checker(Path("(test)"))
    wrapper.visit(checker)
    assert not checker.errors
Example #4
0
def test_assertEquals_01():
    source = """
class MyTestCase(unittest.TestCase):
    def test_1():
        self.assertEquals(True, False)
"""
    module = cst.parse_module(source)
    wrapper = cst.MetadataWrapper(module)
    checker = Checker(Path("(test)"))
    wrapper.visit(checker)
    assert checker.errors
Example #5
0
def main() -> Optional[int]:
    parser = argparse.ArgumentParser(description="Test things.")
    parser.add_argument("file", nargs="+")
    parser.add_argument("-v", "--verbose", action="store_true", help="verbose output.")
    parser.add_argument("-q", "--quiet", action="store_true", help="no output.")
    parser.add_argument(
        "-x",
        "--exitfirst",
        action="store_true",
        help="exit instantly on first error or failed test.",
    )
    parser.add_argument("--ignore", nargs="*", help="errors to ignore.")
    args = parser.parse_args()
    paths = expand_paths([Path(name).expanduser() for name in args.file])
    errors = False
    for path in paths:
        if path.is_dir() or path.suffix != ".py":
            continue
        if args.verbose:
            print(f"Checking {path}")
        py_source = path.read_text()
        module = cst.parse_module(py_source)
        wrapper = cst.MetadataWrapper(module)
        checker = Checker(path, args.verbose, args.ignore)
        wrapper.visit(checker)
        if checker.errors:
            if args.exitfirst:
                return 1
            errors = True
        modernizer = Modernizer(path, args.verbose, args.ignore)
        modified_tree = wrapper.visit(modernizer)
        if modernizer.errors:
            if args.exitfirst:
                return 1
            errors = True
        if not args.quiet:
            diff = "".join(
                difflib.unified_diff(
                    py_source.splitlines(True),
                    modified_tree.code.splitlines(True),
                    fromfile=f"a{path}",
                    tofile=f"b{path}",
                )
            )
            if diff:
                print(diff)
    if errors:
        return 1
Example #6
0
class Detector:
    """ Detector class to detect entities and movies in text """
    def __init__(self):
        """ Init the Detector. Will set extractors, NER models, filter, checker and dataframe"""
        # NER Models
        self.model = spacy.load("en_core_web_lg")
        self.ner = AlbertNER(os.path.join(MODELS_PATH, "conll03"))

        # Check data with movie database
        df_movies = pd.read_csv(os.path.join(ASSETS_PATH, "movies.csv"))
        df_movies = df_movies.loc[df_movies.actors.notna()]
        self.df_movies = df_movies

        # Extractors
        self.award_extractor = AwardsExtractor(df_movies)
        self.genre_extractor = GenreExtractor(df_movies)
        self.person_extractor = PersonExtractor(df_movies)
        self.rate_extractor = RateExtractor(df_movies)
        self.song_extractor = SongExtractor(df_movies)
        self.title_extractor = TitleExtractor(df_movies)
        self.trailer_extractor = TrailerExtractor(df_movies)
        self.year_extractor = YearExtractor(df_movies)
        self.extractors = [
            self.award_extractor, self.genre_extractor, self.person_extractor,
            self.rate_extractor, self.song_extractor, self.title_extractor,
            self.trailer_extractor, self.year_extractor
        ]

        # Filter
        self.filter = Filter()
        # Checker
        self.checker = Checker(self.filter, df_movies)

    def get_entities(self, **kwargs: dict) -> dict:
        """ Get Named Entities from text. Will take text from kwargs and will update them
        with entities_spacy and entities_albert, extracted from the NER models """
        doc = self.model(kwargs['text'])
        kwargs['entities_spacy'] = [(ent.text, ent.label_) for ent in doc.ents]
        kwargs['entities_albert'] = self.ner.extract(kwargs['text'])

        return kwargs

    def parse_entity(self, entity_text: str, label: str) -> List[str]:
        """ Parse an entity to BIO format

        Keyword Arguments:
            :param entity_text: Entity to parse
            :param label: Label to add to the entity
            :return: (List[str]) List of BIO labeled entities
        """
        words = entity_text.split(" ")
        entities = [(words[0], f"B-{label}")]
        entities += [(w, f"I-{label}") for w in words[1:]]

        return entities

    def merge_entities(
            self, entities: List[Tuple[str, str]],
            new_entities: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        """ Merge entities to get the whole text labeled

        Keyword Arguments:
            :param entities: Original entities to get original words from
            :param new_entities: Entities that have been labeled
        """
        original_words = list(
            enumerate([
                ent_[0].strip().strip(string.punctuation) for ent_ in entities
            ]))
        for ent in new_entities:
            words = [x[0] for x in ent]
            idxs = []
            for i, word in original_words:
                if word == words[0].strip().strip(string.punctuation):
                    val = True
                    for j in range(len(words)):
                        if entities[i + j][0] != words[j].strip().strip(
                                string.punctuation):
                            val = False
                            break
                    if val:
                        idxs += list(zip(ent, range(i, i + len(words))))
            for ent_, i in idxs:
                if ent_[1].startswith("I"):
                    if i != 0 and (entities[i - 1][1] == ent_[1].replace(
                            "I-", "B-") or entities[i - 1][1] == ent_[1]):
                        entities[i] = ent_
                else:
                    entities[i] = ent_

        return entities

    def parse_entities(self, **kwargs: dict):
        """ Parse Entities from the text. Will take all entities from kwargs and
        will return the text labeled in BIO format """
        titles_parsed = []
        for title in kwargs['titles']:
            titles_parsed.append(self.parse_entity(title.strip(), "TITLE"))
        years_parsed = []
        for year in kwargs['years']:
            years_parsed.append(self.parse_entity(year.strip(), "YEAR"))
        ratings_avg_parsed = []
        for rating_average in kwargs['rate_avg']:
            ratings_avg_parsed.append(
                self.parse_entity(rating_average.strip(), "RATINGS_AVERAGE"))
        awards_parsed = []
        for award in kwargs['awards']:
            awards_parsed.append(self.parse_entity(award.strip(), "AWARD"))
        songs_parsed = []
        for song in kwargs['songs']:
            songs_parsed.append(self.parse_entity(song.strip(), "SONG"))
        trailers_parsed = []
        for trailer in kwargs['trailers']:
            trailers_parsed.append(
                self.parse_entity(trailer.strip(), "TRAILER"))
        rate_parsed = []
        for rating in kwargs['rate']:
            rate_parsed.append(self.parse_entity(rating.strip(), "RATING"))
        genres_parsed = []
        for genre in kwargs['genres']:
            genres_parsed.append(self.parse_entity(genre.strip(), "GENRE"))
        actors_parsed = []
        for actor in kwargs['actors']:
            actors_parsed.append(self.parse_entity(actor.strip(), "ACTOR"))
        directors_parsed = []
        for director in kwargs['directors']:
            directors_parsed.append(
                self.parse_entity(director.strip(), "DIRECTOR"))
        characters_parsed = []
        for character in kwargs['characters']:
            characters_parsed.append(
                self.parse_entity(character.strip(), "CHARACTER"))
        new_entities = titles_parsed + years_parsed + ratings_avg_parsed + trailers_parsed + \
                       rate_parsed + genres_parsed + directors_parsed + actors_parsed + \
                       characters_parsed + songs_parsed + awards_parsed
        return new_entities

    def extract(self, text: str) -> Tuple[List[Tuple[str, str]], pd.DataFrame]:
        """ Extract entities from texto and return the dataframe of those movies matched.

        Keyword Arguments:
            :param text: Text to extract entities from
            :return: Entities extracted and movies matched
        """
        kwargs = {'text': text}
        words = text.split(" ")
        entities = [(w.strip().strip(string.punctuation), "O") for w in words]

        kwargs = self.get_entities(**kwargs)
        for extractor in self.extractors:
            kwargs = extractor.run(**kwargs)
        kwargs, df = self.checker.run(**kwargs)

        if len(df) == 1:
            kwargs['titles'] = self.title_extractor.get_titles_from_df(
                text, df.original_title.values[0])
            kwargs['genres'] = self.genre_extractor.get_genres_from_df(
                text, df.genre.values[0])
            kwargs['actors'] = list(
                set(kwargs['actors'] +
                    self.person_extractor.get_actors_from_df(
                        text, df.actors.values[0])))
            kwargs['directors'] = self.person_extractor.get_directors_from_df(
                text, df.director.values[0])

        new_entities = self.parse_entities(**kwargs)
        entities = self.merge_entities(entities, new_entities)
        if len(df) == 1:
            return entities, df
        else:
            return entities, None
 def setUp(self):
     self.temp = Checker()
Example #8
0
File: bot.py Project: p-gang/Poker
 def set_bot_combination(self, table):
     check = Checker()
     self.set_combination(check.find_combination(self.cards, table.table_cards))
Example #9
0
def checker():
    return Checker()