Python Utility Beispiele, utils.Utility Python Beispiele

Beispiel #1

0

Datei anzeigen

 def encrypt(seed, sens_priv_num, c_factor, sensor_data):
     data_size = np.size(sensor_data)
     sensing_matrix = Utility.generate_sensing_matrix(
         seed, round((1 - c_factor) * data_size), data_size)
     compressed_data = np.dot(sensing_matrix, sensor_data)
     return compressed_data * Utility.generate_secret_value(
         seed, sens_priv_num)

Beispiel #2

0

Datei anzeigen

Datei: logs.py Projekt: pbehrens/human-computer-interaction

class Logger(object):
    "Class for logging tweet data"
    def __init__(self, messageFileName="default"):
        self.messageFileName = messageFileName
        self.messageLogger = csv.writer(open( self.messageFileName, 'wb'))
        self.messageLogger.writerow(["time", "code", ""])

        self.tweetLogger = csv.writer(open("tweets.csv", 'a'))
    
        self.util = Utility()
        self.time = self.util.currentTimeSeconds()

        self.timeLogger = csv.writer(open("timing.csv", 'wb'))
        self.timeLogger.writerow(['impl', 'execTime', 'highestEmo', 'currentTime'])
        
    def logMessage(self, code, message):
        self.time = self.util.currentTimeMillis()
        self.messageLogger.writerow([self.time, code, message])
        print(str(self.time) + "," + str(code) + "," + 
              str(message) + "\n")
        
    def logTweet(self, tweet):
        self.time = self.util.currentTimeMillis()
        
        # strip out weird chracters preventing the csv to be written
        tweetText = tweet.text
        cleanText = filter(lambda x: x in string.printable, tweetText)
        exclude = set([',', ';'])
        cleanText = ''.join(ch for ch in cleanText if ch not in exclude)
        self.messageLogger.writerow([self.util.currentTimeSeconds(), tweet.created_at, cleanText, tweet.lang, tweet.location])

    def logTiming(self, qualifier, execTime, highestEmo):
        self.time = self.util.currentTimeMillis()
        self.timeLogger.writerow([qualifier, execTime, highestEmo, self.time])

Beispiel #3

0

Datei anzeigen

    def __init__(self, search_rel_question_doc_alg_str="BM25F"):
        if search_rel_question_doc_alg_str == "TF_IDF":
            self.search_alg = TF_IDF
        elif search_rel_question_doc_alg_str == "Frequency":
            self.search_alg = Frequency
        else:
            self.search_alg = BM25F

        Utility.set_stem()

Beispiel #4

0

Datei anzeigen

 def _on_page(self, page, domain):
     self.logger.info("Searching email address on page")
     clean_html = Utility.clean_html(str(page))
     emails = re.findall(cfg.email_regex, clean_html, re.I)
     if emails:
         emails = map(Utility.normalize_email, emails)
     return emails

Beispiel #5

0

Datei anzeigen

 def search_email_in_domain(self, domain):
     self.check_driver()
     try:
         self._go_to_page(domain)
         soup = BS(self.driver.page_source, "lxml")
         # Find email on page and on link
         _email_founds = []
         for pos in POSSIBLE_POSITION:
             action_list = {
                 'on_page': self._on_page,
                 'on_link': self._on_link
             }
             email = action_list.get(pos)(soup, domain)
             _email_founds.append(email)
         email_candidates = Utility.flatten_list(_email_founds)
         if str(domain).endswith('.id') or str(domain).endswith('.id/'):
             emails = self.search_id_domain(domain)
             if emails not in email_candidates:
                 email_candidates += emails
         if not email_candidates:
             # If email not found
             self.logger.info('Email not found on domain %s', domain)
             # Find it using whois
             return []
         else:
             # If email found, filter it
             final_candidates = self._filter_email_candidates(
                 email_candidates)
             return self.sort_email(final_candidates, domain)
     except Exception as exc:
         print "Error on domain {} {} ".format(domain, str(exc))
         return []

Beispiel #6

0

Datei anzeigen

Datei: sitrep.py Projekt: EthanC/SitRep

    def DiffImage(self: Any, source: Dict[str, Any]) -> None:
        """Diff the provided image data source."""

        filename: str = source["filename"]
        url: str = source["url"]
        allowRevert: bool = source.get("allowRevert", True)

        # Append the current timestamp to the end of the URL as an
        # attempt to prevent the Discord CDN from serving previously
        # cached versions of an image.
        timestamp: str = str(int(datetime.utcnow().timestamp()))
        imageUrl: str = f"{url}?{timestamp}"

        older: Dict[str, Any] = source["older"]
        old: Dict[str, Any] = source["old"]
        new: Dict[str, Any] = source["new"]

        if old["raw"] == new["raw"]:
            logger.info(f"No difference found in {filename} ({url})")

            return
        elif (allowRevert is False) and (older["raw"] == new["raw"]):
            logger.info(f"Ignored revert found in {filename} ({url})")

            return

        source["urlTrim"] = Utility.Truncate(self, url, 256)
        old["size"] = Utility.Base64Size(self, old["raw"])
        new["size"] = Utility.Base64Size(self, new["raw"])

        success: bool = SitRep.Notify(
            self,
            {
                "title": source["urlTrim"],
                "description": None,
                "url": url,
                "filename": source["filename"],
                "imageUrl": imageUrl,
                "size": Utility.CountRange(self, new["size"], old["size"]) + " bytes",
                "diffUrl": source["old"]["gist"].html_url + "/revisions",
            },
        )

        # Ensure no changes go without notification
        if success is True:
            Utility.UpdateGist(self, source)

Beispiel #7

0

Datei anzeigen

 def _find_keyword_in_url(self, links, domain):
     # Get all url and normalize it
     normalized_links = self._normalize_elems(links)
     # Filter url that doesn't contain keyword
     candidate_links = \
         filter(lambda x:
                self._is_contain_keyword(x.replace(Utility.find_domain_name(domain), '')), normalized_links)
     return candidate_links

Beispiel #8

0

Datei anzeigen

Datei: logger.py Projekt: iraziud/human-computer-interaction

    def __init__(self, messageFileName="default"):
        self.messageFileName = messageFileName
        self.messageLogger = csv.writer(open( self.messageFileName, 'wb'))
        self.messageLogger.writerow(["time", "code", ""])

        self.tweetLogger = csv.writer(open("tweets.csv", 'a'))
    
        self.util = Utility()
        self.time = self.util.currentTimeSeconds()

Beispiel #9

0

Datei anzeigen

    def filter_train_dataset(train_df):
        filtered_indices = []
        for question_id in tqdm(train_df.question_id.unique(), total=train_df.question_id.nunique()):
            train_df_part = train_df[train_df.question_id == question_id]
            question = train_df_part.question_lem.values[0]
            sentences = train_df_part.sentence_lem.values
            filtered = Utility.filter_by_question_sentence_words_intersect(question, sentences)
            filtered_indices.extend(np.array(train_df_part.index)[np.array(filtered)])

        train_df_filtered = train_df[~train_df.index.isin(filtered_indices)]
        return train_df_filtered

Beispiel #10

0

Datei anzeigen

 def sort_email(self, emails, domain):
     # If this is not governor's domain, do not get any email candidate with .go.id domain name
     if '.go.id' not in domain:
         emails = [email for email in emails if '.go.id' not in emails]
     domain_name = Utility.find_domain_name(domain)
     emails = map(lambda email: (email, domain_name), emails)
     # Sort based on score descending
     emails.sort(cmp=lambda a, b: -1
                 if self.email_scoring(a) > self.email_scoring(b) else 0)
     emails = [x for x, y in emails]
     return emails[:cfg.max_email]

Beispiel #11

0

Datei anzeigen

Datei: wsgi.py Projekt: hanvitha/HackathonRegistration

def save():
    try:
        print("User %s logged in!" % request.form['fname'])
        db = mysql.connector.connect(host=host,
                                     user=user,
                                     password=password,
                                     database=database)
        cursor = db.cursor(buffered=True)
        util = Util()

        status, uid = util.saveUser(db, cursor, request)
        if status and status == 200:
            return render_template("thankyou.html",
                                   fname=request.form['fname'],
                                   uid=uid)
        raise Exception("Unable to insert data!")
    except Exception as e:
        print(json.dumps({"error": str(e)}))
        return "<h1>Oops! Something went wrong.. Could you try after sometime or reach out to the host!</h1>"
    finally:
        db.close()
        cursor.close()

Beispiel #12

0

Datei anzeigen

 def _on_link(self, page, domain):
     self.logger.info("Search email address on link to another page")
     _email_founds = []
     # Find all possible link element
     links = page.findAll('a')
     # Find all candidate link with keyword on html page
     keyword_html_link = self._find_keyword_in_html_text(links)
     # Find all candidate link with keyword on url
     keyword_url_link = self._find_keyword_in_url(links, domain)
     # Merge the url result, remove duplicate url
     candidate_links = Utility.uniquify(keyword_html_link +
                                        keyword_url_link)
     # Check for invalid url and try to fix it
     invalid_url = [
         uri for uri in candidate_links if not cfg.url_regex.match(uri)
     ]
     try_fix_invalid_url = map(
         lambda _uri: Utility.normalize_invalid_url(_uri, domain),
         invalid_url)
     # Filter invalid url
     candidate_links = candidate_links + try_fix_invalid_url
     candidate_links = Utility.uniquify(
         [_uri for _uri in candidate_links if cfg.url_regex.match(_uri)])
     try:
         for link in candidate_links:
             self.logger.info("Go to next link: " + link)
             try:
                 self._go_to_page(link)
             except Exception, err:
                 print str(err)
                 continue
             soup = BS(self.driver.page_source, "lxml")
             email = self._on_page(soup, domain)
             _email_founds.append(email)
         return _email_founds if not _email_founds else Utility.flatten_list(
             _email_founds)

Beispiel #13

0

Datei anzeigen

    def create_database(self, df, database_origin_dir='../data/database_origin', database_lem_dir='../data/database_lem'
    ):
        if not os.path.exists(database_origin_dir):
            os.mkdir(database_origin_dir)
        if not os.path.exists(database_lem_dir):
            os.mkdir(database_lem_dir)

        for paragraph in tqdm(df.paragraph.unique(), total=df.paragraph.nunique()):
            paragraph_id = (df[df.paragraph == paragraph].paragraph_id.values[0])

            with open("{}/{}.txt".format(database_origin_dir, paragraph_id), 'w') as fout:
                fout.write(paragraph)

            txt_lemm = Utility.lemmatize(paragraph)
            with open("{}/{}.txt".format(database_lem_dir, paragraph_id), 'w') as fout:
                fout.write(txt_lemm)

Beispiel #14

0

Datei anzeigen

Datei: sitrep.py Projekt: EthanC/SitRep

    def Initialize(self: Any) -> None:
        """Initialize SitRep and begin primary functionality."""

        logger.info("SitRep")
        logger.info("https://github.com/EthanC/SitRep")

        self.config: Dict[str, Any] = SitRep.LoadConfig(self)

        SitRep.SetupLogging(self)

        self.git: Github = Utility.GitLogin(self)

        for source in self.config["dataSources"]:
            SitRep.ProcessDataSource(self, source)

        logger.success("Finished processing data sources")

Beispiel #15

0

Datei anzeigen

Datei: main.py Projekt: anchalghale/sultan

def main():
    '''Main function of the script'''
    paused = False

    logger = CliLogger()
    screen = Screen()
    resources = Resources()
    analytics = Analytics(logger)
    cooldown = Cooldown(COOLDOWNS)
    analytics.ignore = ANALYTICS_IGNORE
    resources.load(analytics)
    utility = Utility(logger, screen, resources, analytics, cooldown)
    logic = Logic(utility)
    try:
        handle = wait_league_window(logger, (0, 0, 1024, 768))
    except CantForgroundWindowError:
        pass
    logger.log('Press and hold x to exit bot.')
    screen.d3d.capture(target_fps=10, region=find_rect(handle))
    while True:
        try:
            if keyboard.is_pressed('x'):
                raise BotExitException
            if keyboard.is_pressed('ctrl+u'):
                paused = False
            if paused:
                time.sleep(0.1)
                continue
            if keyboard.is_pressed('ctrl+p'):
                paused = True
                logger.log(
                    'Bot paused. Press ctrl+u to unpause. Press x to exit.')
                continue
            logic.tick()
            time.sleep(random.randint(*TICK_INTERVAL) / 1000)
        except BotContinueException as exp:
            time.sleep(random.randint(*exp.tick_interval) / 1000)
        except NoCharacterInMinimap:
            time.sleep(1)
        except BotExitException:
            screen.d3d.stop()
            break
        except Exception:  # pylint:disable=broad-except
            traceback.print_exc()
            screen.d3d.stop()
            break

Beispiel #16

0

Datei anzeigen

 def _filter_email_candidates(candidates):
     # Remove duplicate element
     candidates = Utility.uniquify(
         map(lambda email: str(email).strip().lower(),
             [] if not candidates else candidates))
     # Filter email that contain blacklist word
     candidates = filter(
         lambda email: not re.match(cfg.get_blacklist_regex(), email),
         candidates)
     # Filter short email
     candidates = [
         candidate for candidate in candidates if len(candidate) > 5
     ]
     # Filter email that contain newline and space
     candidates = [
         candidate for candidate in candidates if '\n' not in candidate
         and ' ' not in candidate and '\t' not in candidate
     ]
     return candidates

Beispiel #17

0

Datei anzeigen

    def create_train_dataset(errors, data_dir='../notebooks/bm25f', database_dir="../data/database_origin"):# polyglot works with origin text to split into sentences
        df_dict = {}
        for f in tqdm(os.listdir(data_dir)):
            question_id = int(f.split('.')[0])
            if f.endswith('.npy') and question_id not in errors:
                res = {}
                for doc_number, doc_id in enumerate(np.load('{}/{}'.format(data_dir, f))):
                    with open("{}/{}.txt".format(database_dir, doc_id)) as fin:
                        res[doc_number] = Utility.sentence_splitter(fin.read())
                df_dict[question_id] = res

        df_with_list_of_docs = pd.DataFrame.from_records(df_dict).T
        df_with_list_of_sentences = pd.DataFrame()
        for col in df_with_list_of_docs.columns:
            df_per_doc = df_with_list_of_docs.apply(lambda x: pd.Series(x[col]), axis=1).stack().reset_index(level=1, drop=True).to_frame()
            df_per_doc['doc_number'] = col
            df_with_list_of_sentences = pd.DataFrame.append(
                df_with_list_of_sentences,
                df_per_doc
            )
        df_with_list_of_sentences = df_with_list_of_sentences.reset_index()
        df_with_list_of_sentences.columns = ['question_id', 'sentence', 'doc_number']
        return df_with_list_of_sentences

Beispiel #18

0

Datei anzeigen

Datei: sitrep.py Projekt: EthanC/SitRep

            "avatar_url": self.config["discord"]["avatarUrl"],
            "embeds": [
                {
                    "title": embed.get("title"),
                    "description": embed.get("description"),
                    "url": embed.get("url"),
                    "timestamp": datetime.utcnow().isoformat(),
                    "color": int("66BB6A", base=16),
                    "footer": {
                        "text": embed.get("filename"),
                    },
                    "image": {"url": embed.get("imageUrl")},
                    "author": {
                        "name": "SitRep",
                        "url": "https://github.com/EthanC/SitRep",
                        "icon_url": "https://i.imgur.com/YDZgxh2.png",
                    },
                    "fields": fields,
                }
            ],
        }

        return Utility.POST(self, self.config["discord"]["webhookUrl"], payload)


if __name__ == "__main__":
    try:
        SitRep.Initialize(SitRep)
    except KeyboardInterrupt:
        exit()

Beispiel #19

0

Datei anzeigen

 def decrypt(seed, sensor_priv_num, org_data_len, n_nonzero_coefs, enc_data):
     enc_data /= Utility.generate_secret_value(seed, sensor_priv_num)
     enc_data_size = np.size(enc_data)
     sensing_matrix = Utility.generate_sensing_matrix(seed, enc_data_size, org_data_len)
     omp = orthogonal_mp(sensing_matrix, enc_data, n_nonzero_coefs = n_nonzero_coefs)
     return omp

Beispiel #20

0

Datei anzeigen

base_dir = str(Path.home())

default_args = {
    'owner': 'user',
    'depends_on_past': False,
    'start_date': dt.datetime.strptime('2018-07-29T00:00:00',
                                       '%Y-%m-%dT%H:%M:%S'),
    'provide_context': True
}
# Instantiate the DAG
dag = DAG('dag1',
          default_args=default_args,
          schedule_interval='0 0 * * *',
          max_active_runs=1)  # scheduled to run everyday at midnight

util = Utility(news_api_key='', s3_bucket='')


# get all sources in english language
def sources(**kwargs):
    #sourcesCsvString=util.getSources('business','en','in')
    sourcesCsvString = util.getSources(language='en')
    return sourcesCsvString


# get top headlines for list of sources given
def headlines(**kwargs):
    ti = kwargs['ti']
    v1 = ti.xcom_pull(task_ids='gettingsources'
                      )  # xcom pull used to get values from the sources task
    csvFilesList = util.getheadlines(v1)

Beispiel #21

0

Datei anzeigen

#!/usr/bin/python3

from pathlib import Path
import os
import sys

libPath = os.path.join(Path(__file__).absolute().parent.parent, 'pyuval/')

sys.path.append(libPath)
from config import Config
from utils import Utility

util = Utility()
config = Config()

memoryData = util.readDataFromMemoy(config.get('ksm:gpg:shm:address'))
print(memoryData)

Beispiel #22

0

Datei anzeigen

parser.add_argument("-l",
                    "--list",
                    help="list all yubikeys in database",
                    action="store_true",
                    default=False,
                    dest="list")

parser.add_argument("-s",
                    "--subsystem",
                    help="Subsystem to apply action [client, yubikey]",
                    default=None,
                    type=str,
                    dest="subsystem")
args = parser.parse_args()

util = Utility()
config = Config()
log = PyuvalLogging(ModuleName="ManagePyuval")

if 'SUDO_USER' in os.environ:
    user = os.environ['SUDO_USER']
else:
    user = os.environ['USER']

ksmDb = Database(**config.get('ksm:db'))

if args.subsystem == "yubikey":
    if args.add:
        serial = input('Serial: ').strip()
        username = input('Username: '******'Public ID: ').strip()

Beispiel #23

0

Datei anzeigen

    def get_base_stats(question, sentences, question_lem, sentences_lem, idfs, idfs_lem):
        unique_word_count_scores, unique_word_percent_scores, sentence_len, bm25f_scores, tf_idf_scores = Utility.stats(question, sentences, idfs)
        unique_lem_word_count_scores, unique_lem_word_percent_scores, sentence_lem_len, bm25f_lem_scores, tf_idf_lem_scores = Utility.stats(question_lem, sentences_lem, idfs_lem)

        s = pd.Series([
            unique_word_count_scores,
            unique_lem_word_count_scores,

            unique_word_percent_scores,
            unique_lem_word_percent_scores,

            sentence_len,
            sentence_lem_len,

            bm25f_scores,
            bm25f_lem_scores,

            tf_idf_scores,
            tf_idf_lem_scores,

            sentences,
            sentences_lem,
        ])
        return pd.DataFrame.from_items(zip(s.index, s.values))

Beispiel #24

0

Datei anzeigen

Datei: main.py Projekt: yasufumy/placeholder_reconstruction

def training(args):
    source = EventField(fix_length=args.event_size, embed_size=args.src_embed)
    mask_flag = 'tmpl' in args.net
    sentence_size = args.sentence_size if args.truncate else None
    reverse_decode = args.reverse_decode
    if 'disc' in args.net:
        target = TextAndContentWordField(start_token=None,
                                         fix_length=sentence_size,
                                         mask_player=mask_flag,
                                         mask_team=mask_flag,
                                         numbering=args.numbering,
                                         reverse=reverse_decode,
                                         bpc=args.bpc,
                                         multi_tag=args.multi_tag)
    else:
        target = TextField(start_token=None,
                           fix_length=sentence_size,
                           mask_player=mask_flag,
                           mask_team=mask_flag,
                           numbering=args.numbering,
                           reverse=reverse_decode,
                           bpc=args.bpc,
                           multi_tag=args.multi_tag)
    if args.truncate:
        train = OptaDataset(path=args.dataset + '.train',
                            fields={
                                'source': source,
                                'target': target
                            })
    else:
        train = OptaDataset(path=args.dataset + '.train',
                            fields={
                                'source': source,
                                'target': target
                            },
                            limit_length=args.limit)
    source.build_vocabulary(train.source)
    target.build_vocabulary(train.target, size=args.vocab_size)
    target.player_to_id = source.player_to_id
    target.players = source.id_to_player
    if mask_flag or 'disc' in args.net:
        content_word_to_id = getattr(target, 'content_word_to_id', None)
        target_test = TestTextField(source.id_to_player,
                                    source.id_to_team,
                                    target.word_to_id,
                                    content_word_to_id,
                                    target.unk_id,
                                    fix_length=None,
                                    bpc=args.bpc)
    else:
        target_test = TextField(start_token=None,
                                end_token=None,
                                fix_length=None,
                                bpc=args.bpc)
        target_test.word_to_id = target.word_to_id
        target_test.id_to_word = target.id_to_word
        target_test.unk_id = target.unk_id
    dev = OptaDataset(path=args.dataset + '.dev',
                      fields={
                          'source': source,
                          'target': target_test
                      },
                      limit_length=args.limit)
    train2 = OptaDataset(path=args.dataset + '.train',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=args.limit)
    test = OptaDataset(path=args.dataset + '.test',
                       fields={
                           'source': source,
                           'target': target_test
                       })
    test20 = OptaDataset(path=args.dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=20)
    test15 = OptaDataset(path=args.dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=15)
    test10 = OptaDataset(path=args.dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=10)

    start_id, end_id = target.word_to_id['<s>'], target.word_to_id['</s>']
    class_weight = compute_class_weight('./dataset/player_list.txt',
                                        target.word_to_id,
                                        args.class_weight[0],
                                        args.class_weight[1],
                                        gpu=args.gpu)
    dirname = Utility.get_save_directory(
        args.net, './debug' if args.debug else args.output)
    if args.debug:
        save_path = os.path.join('./debug', dirname)
    else:
        save_path = os.path.join(args.output, dirname)
    Utility.make_directory(save_path)
    del args.vocab_size
    setting = {
        'vocab_size': len(target.word_to_id),
        'type_size': len(source.type_to_id),
        'player_size': len(source.player_to_id),
        'team_size': len(source.team_to_id),
        'detail_size': len(source.detail_to_id),
        'detail_dim': source.details_dimention,
        'start_id': start_id,
        'end_id': end_id,
        'unk_id': target.unk_id,
        'save_path': save_path,
        **vars(args)
    }
    dump_setting(setting, os.path.join(save_path, 'setting.yaml'))
    home_player_tag = target.word_to_id.get(target.home_player_tag)
    away_player_tag = target.word_to_id.get(target.away_player_tag)
    home_team_tag = target.word_to_id.get(target.home_team_tag)
    away_team_tag = target.word_to_id.get(target.away_team_tag)
    print('vocab size: {}'.format(len(target.word_to_id)))
    if args.net == 'plain':
        model = MLPEncoder2AttentionDecoder(len(source.type_to_id),
                                            len(source.player_to_id),
                                            len(source.team_to_id),
                                            len(source.detail_to_id),
                                            source.details_dimention,
                                            args.src_embed,
                                            args.event_size,
                                            len(target.word_to_id),
                                            args.trg_embed,
                                            args.hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            args.mlp_layers,
                                            args.max_length,
                                            args.dropout,
                                            IGNORE_LABEL,
                                            reverse_decode=reverse_decode)
    elif args.net == 'tmpl':
        model = MLPEncoder2AttentionDecoder(len(source.type_to_id),
                                            len(source.player_to_id),
                                            len(source.team_to_id),
                                            len(source.detail_to_id),
                                            source.details_dimention,
                                            args.src_embed,
                                            args.event_size,
                                            len(target.word_to_id),
                                            args.trg_embed,
                                            args.hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            args.mlp_layers,
                                            args.max_length,
                                            args.dropout,
                                            IGNORE_LABEL,
                                            source.id_to_player,
                                            home_player_tag,
                                            away_player_tag,
                                            source.id_to_team,
                                            home_team_tag,
                                            away_team_tag,
                                            target.player_to_id,
                                            target.players,
                                            reverse_decode=reverse_decode)
    elif args.net == 'gate':
        model = MLPEncoder2GatedAttentionDecoder(len(source.type_to_id),
                                                 len(source.player_to_id),
                                                 len(source.team_to_id),
                                                 len(source.detail_to_id),
                                                 source.details_dimention,
                                                 args.src_embed,
                                                 args.event_size,
                                                 len(target.word_to_id),
                                                 args.trg_embed,
                                                 args.hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 args.mlp_layers,
                                                 args.max_length,
                                                 args.dropout,
                                                 IGNORE_LABEL,
                                                 reverse_decode=reverse_decode)
    elif args.net == 'gate-tmpl':
        model = MLPEncoder2GatedAttentionDecoder(len(source.type_to_id),
                                                 len(source.player_to_id),
                                                 len(source.team_to_id),
                                                 len(source.detail_to_id),
                                                 source.details_dimention,
                                                 args.src_embed,
                                                 args.event_size,
                                                 len(target.word_to_id),
                                                 args.trg_embed,
                                                 args.hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 args.mlp_layers,
                                                 args.max_length,
                                                 args.dropout,
                                                 IGNORE_LABEL,
                                                 source.id_to_player,
                                                 home_player_tag,
                                                 away_player_tag,
                                                 source.id_to_team,
                                                 home_team_tag,
                                                 away_team_tag,
                                                 target.player_to_id,
                                                 target.players,
                                                 reverse_decode=reverse_decode)
    elif args.net == 'disc':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif args.net == 'disc-tmpl':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    elif args.net == 'gate-disc':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif args.net == 'gate-disc-tmpl':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    elif args.net == 'conv-gate-disc-tmpl':
        model = DiscriminativeGLUEncoder2GatedAttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)

    model.keyword_ids = [
        target.word_to_id['save'], target.word_to_id['block'],
        target.word_to_id['chance'], target.word_to_id['shot'],
        target.word_to_id['clearance'], target.word_to_id['kick'],
        target.word_to_id['ball'], target.word_to_id['blocked'],
        target.word_to_id['denied']
    ]
    model.id_to_word = target.id_to_word
    if args.numbering:
        model.player_id = target.player_id
        model.team_id = target.team_id

    if args.gpu is not None:
        model.use_gpu(args.gpu)
    opt = optimizers.Adam(args.lr)
    opt.setup(model)
    if args.clipping > 0:
        opt.add_hook(GradientClipping(args.clipping))
    if args.decay > 0:
        opt.add_hook(WeightDecay(args.decay))

    N = len(train.source)
    batch_size = args.batch
    order_provider = OrderProvider(Sampling.get_random_order(N))
    src_train_iter = SequentialIterator(train.source,
                                        batch_size,
                                        order_provider,
                                        args.event_size,
                                        source.fillvalue,
                                        gpu=args.gpu)
    if 'disc' in args.net:
        trg_train_iter = TextAndLabelIterator(train.target,
                                              batch_size,
                                              order_provider,
                                              args.sentence_size,
                                              IGNORE_LABEL,
                                              gpu=args.gpu)
    else:
        trg_train_iter = SequentialIterator(train.target,
                                            batch_size,
                                            order_provider,
                                            args.sentence_size,
                                            IGNORE_LABEL,
                                            gpu=args.gpu)
    src_dev_iter = SequentialIterator(dev.source,
                                      batch_size,
                                      None,
                                      args.event_size,
                                      source.fillvalue,
                                      gpu=args.gpu)
    trg_dev_iter = Iterator(dev.target,
                            batch_size,
                            wrapper=EndTokenIdRemoval(end_id),
                            gpu=None)
    src_test_iter = SequentialIterator(test.source,
                                       batch_size,
                                       None,
                                       args.event_size,
                                       source.fillvalue,
                                       gpu=args.gpu)
    src_test20_iter = SequentialIterator(test20.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test15_iter = SequentialIterator(test15.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test10_iter = SequentialIterator(test10.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_train2_iter = SequentialIterator(train2.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    trg_train2_iter = Iterator(train2.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test_iter = Iterator(test.target,
                             batch_size,
                             wrapper=EndTokenIdRemoval(end_id),
                             gpu=None)
    trg_test20_iter = Iterator(test20.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test15_iter = Iterator(test15.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test10_iter = Iterator(test10.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    if 'disc' in args.net:
        trainer = Seq2SeqWithLabelTrainer(
            model, opt, src_train_iter, trg_train_iter, src_dev_iter,
            trg_dev_iter, order_provider, evaluate_bleu_and_accuracy,
            args.epoch, save_path, args.eval_step, src_train2_iter,
            trg_train2_iter)
    else:
        trainer = Seq2SeqTrainer(model, opt, src_train_iter, trg_train_iter,
                                 src_dev_iter, trg_dev_iter, order_provider,
                                 evaluate_bleu, args.epoch, save_path,
                                 args.eval_step, src_train2_iter,
                                 trg_train2_iter)

    trainer.run()

    # load best model
    model.load_model(os.path.join(save_path, 'best.model'))
    if 'disc' in args.net:
        bleu_score_dev, _, _ = evaluate_bleu_and_accuracy(
            model, src_dev_iter, trg_dev_iter)
        bleu_score, _, _ = evaluate_bleu_and_accuracy(model, src_test_iter,
                                                      trg_test_iter)
        bleu_score20, _, hypotheses = evaluate_bleu_and_accuracy(
            model, src_test20_iter, trg_test20_iter)
        bleu_score15, _, _ = evaluate_bleu_and_accuracy(
            model, src_test15_iter, trg_test15_iter)
        bleu_score10, _, _ = evaluate_bleu_and_accuracy(
            model, src_test10_iter, trg_test10_iter)
    else:
        bleu_score_dev, _ = evaluate_bleu(model, src_dev_iter, trg_dev_iter)
        bleu_score, _ = evaluate_bleu(model, src_test_iter, trg_test_iter)
        bleu_score20, hypotheses = evaluate_bleu(model, src_test20_iter,
                                                 trg_test20_iter)
        bleu_score15, _ = evaluate_bleu(model, src_test15_iter,
                                        trg_test15_iter)
        bleu_score10, _ = evaluate_bleu(model, src_test10_iter,
                                        trg_test10_iter)
    TextFile(os.path.join(save_path, 'hypotheses.txt'),
             [' '.join(ys) for ys in trainer.hypotheses]).save()
    print('dev score: {}'.format(bleu_score_dev))
    print('test score: {}'.format(bleu_score))
    print('test score20: {}'.format(bleu_score20))
    print('test score15: {}'.format(bleu_score15))
    print('test score10: {}'.format(bleu_score10))

    # saving fields
    pickle_dump(os.path.join(save_path, 'source.pkl'), source)
    pickle_dump(os.path.join(save_path, 'target.pkl'), target)
    pickle_dump(os.path.join(save_path, 'target_test.pkl'), target_test)

Beispiel #25

0

Datei anzeigen

Datei: evaluation.py Projekt: Sumedh1505/AIML_Codes

    def predict_on_test(self):
        """
        This function will load the test dataset, pre-process the test
        images and check the performance of the trained models on unseen
        data. This will also save the confusion matrix and classification
        report as csv file in seperate dataframes for each model and for
        each stage, in the evaluation directory.
        
        Arguments:                    
            
            -size_dict    : Contains information about the image input image sizes for each of the models
                
            -model_name   : Name of the model, for example - vgg16, inception_v3, resnet50 etc
                          
            -stage_no     : The training stage of the model. You will have a choice to select the number
                            of training stages. In stage 1, we only fine tune the top 2 dense layers by
                            freezing the convolution base. In stage 2, we will re adjust the weights trained
                            in stage 1 by training the top convolution layers, by freezing the dense layers.
        """

        print("\nStarting model evaluation for stage {}..".format(
            self.stage_no))

        #Create an utility class object to access the class methods
        utils_obj = Utility(self.input_params, self.path_dict)

        df_test = utils_obj.load_data("test")

        test_datagen = ImageDataGenerator(
            preprocessing_function=utils_obj.init_preprocess_func())

        test_generator = test_datagen.flow_from_dataframe(
            dataframe=df_test,
            directory=self.path_dict['source'],
            target_size=utils_obj.init_sizes(),
            x_col="filenames",
            y_col="class_label",
            batch_size=1,
            class_mode='categorical',
            color_mode='rgb',
            shuffle=False)

        nb_test_samples = len(test_generator.classes)

        model = utils_obj.get_models(self.stage_no)
        class_indices = test_generator.class_indices

        def label_class(cat_name):
            return (class_indices[cat_name])

        df_test['true'] = df_test['class_label'].apply(
            lambda x: label_class(str(x)))
        y_true = df_test['true'].values

        #Predictions (Probability Scores and Class labels)
        y_pred_proba = model.predict_generator(test_generator,
                                               nb_test_samples // 1)
        y_pred = np.argmax(y_pred_proba, axis=1)

        df_test['predicted'] = y_pred
        df_test.to_csv(self.path_dict["eval_path"] +
                       "stage{}/".format(self.stage_no) +
                       '{}_predictions_stage_{}.csv'.format(
                           self.input_params['model_name'], self.stage_no))
        dictionary = dict(zip(df_test.true.values, df_test.class_label.values))

        #Confusion Matrixs
        cm = metrics.confusion_matrix(y_true, y_pred)
        df_cm = pd.DataFrame(cm).transpose()
        df_cm = df_cm.rename(mapper=dict,
                             index=dictionary,
                             columns=dictionary,
                             copy=True,
                             inplace=False)
        df_cm.to_csv(self.path_dict["eval_path"] +
                     "stage{}/".format(self.stage_no) +
                     '{}_cm_stage_{}.csv'.format(
                         self.input_params['model_name'], self.stage_no))
        print('Confusion matrix prepared and saved..')

        #Classification Report
        report = metrics.classification_report(y_true,
                                               y_pred,
                                               target_names=list(
                                                   class_indices.keys()),
                                               output_dict=True)

        df_rep = pd.DataFrame(report).transpose()
        df_rep.to_csv(self.path_dict["eval_path"] +
                      "stage{}/".format(self.stage_no) +
                      '{}_class_report_stage_{}.csv'.format(
                          self.input_params['model_name'], self.stage_no))
        print('Classification report prepared and saved..')

        EvalUtils.plot_confusion_matrix(
            self, y_true, y_pred, list(test_generator.class_indices.keys()))

        #General Metrics
        df_metrics = EvalUtils.get_metrics(self, y_true, y_pred)
        df_metrics.to_csv(self.path_dict["eval_path"] +
                          "stage{}/".format(self.stage_no) +
                          '{}_metrics_stage_{}.csv'.format(
                              self.input_params['model_name'], self.stage_no))

        history_df = pd.read_csv(
            self.path_dict["model_path"] + "stage{}/".format(self.stage_no) +
            "{}_history_stage_{}.csv".format(self.input_params['model_name'],
                                             self.stage_no))

        #Get the train vs validation loss for all epochs
        EvalUtils.plt_epoch_error(self, history_df)

        #Generate a complete report and save it as an HTML file in the evaluation folder location
        EvalUtils.get_complete_report(self, y_true, y_pred, class_indices)

Beispiel #26

0

Datei anzeigen

Datei: Main.py Projekt: kraktos/SimpleSearch

This file primarily performs
(a) indexing the input file
(b) persisting the indices locally
(c) using indices to return search results for a given query
"""
import time
import sys
from optparse import OptionParser

import utils.Utility as utility
from core.Indexing import BuiltFileIndex
from core.Searching import SearchIndex

if __name__ == "__main__":
    startTime = time.time()
    print("Starting: {}".format(utility.get_date_time(startTime)))

    try:
        parser = OptionParser()
        parser.add_option("-f", "--file", dest="file")

        # parse the input
        (options, args) = parser.parse_args()

        # get the to be indexed file
        input_file = options.file

        # if not present throw exception
        if input_file is None:
            raise Exception("Missing Input File!")

Beispiel #27

0

Datei anzeigen

Datei: sitrep.py Projekt: EthanC/SitRep

    def ProcessDataSource(self: Any, source: Dict[str, Any]) -> None:
        """Prepare to diff the provided data source."""

        source["hash"] = Utility.MD5(self, source["url"])
        source["older"] = {}
        source["old"] = {}
        source["new"] = {}

        older: Dict[str, Any] = source["older"]
        old: Dict[str, Any] = source["old"]
        new: Dict[str, Any] = source["new"]

        format: str = source["contentType"].upper()
        allowRevert: bool = source.get("allowRevert", True)

        if format == "JSON":
            source["ext"] = "json"
            source["filename"] = source["hash"] + "." + source["ext"]

            old["gist"] = Utility.GetGist(self, source["filename"])
            new["raw"] = Utility.FormatJSON(self, Utility.GET(self, source["url"]))

            if old["gist"] is False:
                return
            elif (new["raw"] is not None) and (old["gist"] is not None):
                if allowRevert is False:
                    older["raw"] = Utility.FormatJSON(
                        self,
                        Utility.GetGistRaw(self, old["gist"], source["filename"], 1),
                    )

                old["raw"] = Utility.FormatJSON(
                    self, Utility.GetGistRaw(self, old["gist"], source["filename"])
                )

                SitRep.DiffJSON(self, source)
            elif (new["raw"] is not None) and (old["gist"] is None):
                Utility.CreateGist(self, source)
        elif format == "IMAGE":
            source["ext"] = "txt"
            source["filename"] = source["hash"] + "." + source["ext"]

            old["gist"] = Utility.GetGist(self, source["filename"])
            new["raw"] = Utility.Base64(
                self, Utility.GET(self, source["url"], raw=True)
            )

            if old["gist"] is False:
                return
            elif (new["raw"] is not None) and (old["gist"] is not None):
                if allowRevert is False:
                    older["raw"] = Utility.GetGistRaw(
                        self, old["gist"], source["filename"], 1
                    )

                old["raw"] = Utility.GetGistRaw(self, old["gist"], source["filename"])

                SitRep.DiffImage(self, source)
            elif (new["raw"] is not None) and (old["gist"] is None):
                Utility.CreateGist(self, source)
        elif format == "TEXT":
            source["ext"] = source.get("fileType", "txt")
            source["filename"] = source["hash"] + "." + source["ext"]

            old["gist"] = Utility.GetGist(self, source["filename"])
            new["raw"] = Utility.GET(self, source["url"])

            if old["gist"] is False:
                return
            elif (new["raw"] is not None) and (old["gist"] is not None):
                if allowRevert is False:
                    older["raw"] = Utility.GetGistRaw(
                        self, old["gist"], source["filename"], 1
                    )

                old["raw"] = Utility.GetGistRaw(self, old["gist"], source["filename"])

                SitRep.DiffText(self, source)
            elif (new["raw"] is not None) and (old["gist"] is None):
                Utility.CreateGist(self, source)
        else:
            logger.error(f"Data source with content type {format} is not supported")
            logger.debug(source)

Beispiel #28

0

Datei anzeigen

        try:
            for link in candidate_links:
                self.logger.info("Go to next link: " + link)
                try:
                    self._go_to_page(link)
                except Exception, err:
                    print str(err)
                    continue
                soup = BS(self.driver.page_source, "lxml")
                email = self._on_page(soup, domain)
                _email_founds.append(email)
            return _email_founds if not _email_founds else Utility.flatten_list(
                _email_founds)
        except Exception, e:
            logging.error(str(e))
            return _email_founds if not _email_founds else Utility.flatten_list(
                _email_founds)

    def sort_email(self, emails, domain):
        # If this is not governor's domain, do not get any email candidate with .go.id domain name
        if '.go.id' not in domain:
            emails = [email for email in emails if '.go.id' not in emails]
        domain_name = Utility.find_domain_name(domain)
        emails = map(lambda email: (email, domain_name), emails)
        # Sort based on score descending
        emails.sort(cmp=lambda a, b: -1
                    if self.email_scoring(a) > self.email_scoring(b) else 0)
        emails = [x for x, y in emails]
        return emails[:cfg.max_email]

    @staticmethod
    def email_scoring(email_payload):

Beispiel #29

0

Datei anzeigen

Datei: sitrep.py Projekt: EthanC/SitRep

    def DiffText(self: Any, source: Dict[str, Any]) -> None:
        """Diff the provided text data source."""

        filename: str = source["filename"]
        url: str = source["url"]
        allowRevert: bool = source.get("allowRevert", True)

        older: Dict[str, Any] = source["older"]
        old: Dict[str, Any] = source["old"]
        new: Dict[str, Any] = source["new"]

        if allowRevert is False:
            older["hash"] = Utility.MD5(self, older["raw"])

        old["hash"] = Utility.MD5(self, old["raw"])
        new["hash"] = Utility.MD5(self, new["raw"])

        if old["hash"] == new["hash"]:
            logger.info(f"No difference found in {filename} ({url})")

            return
        elif (allowRevert is False) and (older["hash"] == new["hash"]):
            logger.info(f"Ignored revert found in {filename} ({url})")

            return

        diff: Iterator[str] = Differ().compare(
            old["raw"].splitlines(), new["raw"].splitlines()
        )

        desc: str = ""
        additions: int = 0
        deletions: int = 0

        for line in diff:
            if line.startswith("+ "):
                additions += 1
                desc += f"{line}\n"
            elif line.startswith("- "):
                deletions += 1
                desc += f"{line}\n"

        desc = Utility.Truncate(self, desc, 4048, split="\n")
        source["urlTrim"] = Utility.Truncate(self, url, 256)

        success: bool = SitRep.Notify(
            self,
            {
                "title": source["urlTrim"],
                "description": f"```diff\n{desc}```",
                "url": url,
                "filename": source["filename"],
                "additions": f"{additions:,}",
                "deletions": f"{deletions:,}",
                "diffUrl": source["old"]["gist"].html_url + "/revisions",
            },
        )

        # Ensure no changes go without notification
        if success is True:
            Utility.UpdateGist(self, source)

Beispiel #30

0

Datei anzeigen

 def _is_contain_domain(self, domain, email):
     email_domain = Utility.find_email_domain(email)
     return email_domain in domain if email_domain else False

Beispiel #31

0

Datei anzeigen

    def train_stage1(self):
        """
        In this stage, we will freeze all the convolution blocks and train
        only the newly added dense layers. We will add a global spatial average
        pooling layer, we will add fully connected dense layers on the output
        of the base models. We will freeze the convolution base and train only
        the top layers. We will set all the convolution layers to false, the model
        should be compiled when all the convolution layers are set to false.
        
        Arguments:
            
            -input_params  :  This parameter will contain all the information that the user will
                              input through the terminal
        """

        print(
            "\nTraining the model by freezing the convolution block and tuning the top layers..."
        )
        st = dt.now()

        utils_obj = Utility(self.input_params, self.path_dict)

        #Put if statement here. If model_name != custom then run this block, or else. Do something else.

        if (self.input_params['model_name'] != 'custom'):
            base_model = utils_obj.load_imagenet_model()

            #Adding a global spatial average pooling layer
            x = base_model.output
            x = GlobalAveragePooling2D()(x)

            #Adding a fully-connected dense layer
            #x = Dense(self.input_params['dense_neurons'], activation='relu', kernel_initializer='he_normal')(x)
            #Adding the custom layers
            customlayers = self.input_params['customlayers']
            #Adding a final dense output final layer
            x = customlayers(x)
            n = utils_obj.no_of_classes()
            output_layer = Dense(
                n,
                activation=self.input_params['outputlayer_activation'],
                kernel_initializer='glorot_uniform')(x)
            model_stg1 = Model(inputs=base_model.input, outputs=output_layer)
            #Define the model
            model_stg1 = Model(inputs=base_model.input, outputs=output_layer)

            #Here we will freeze the convolution base and train only the top layers
            #We will set all the convolution layers to false, the model should be
            #compiled when all the convolution layers are set to false
            for layer in base_model.layers:
                layer.trainable = False

        else:
            model_stg1 = self.input_params['custom_model']

        #Compiling the model
        model_stg1.compile(
            optimizer=optimizers.Adam(lr=self.input_params['stage1_lr']),
            loss='categorical_crossentropy',
            metrics=[self.input_params['metric']])

        #Normalize the images
        train_datagen = ImageDataGenerator(
            preprocessing_function=utils_obj.init_preprocess_func())
        val_datagen = ImageDataGenerator(
            preprocessing_function=utils_obj.init_preprocess_func())

        df_train = utils_obj.load_data("train")
        df_val = utils_obj.load_data("val")

        train_generator = train_datagen.flow_from_dataframe(
            dataframe=df_train,
            directory=self.path_dict['source'],
            target_size=utils_obj.init_sizes(),
            x_col="filenames",
            y_col="class_label",
            batch_size=self.input_params['batch_size'],
            class_mode='categorical',
            color_mode='rgb',
            shuffle=True)

        val_generator = val_datagen.flow_from_dataframe(
            dataframe=df_val,
            directory=self.path_dict['source'],
            target_size=utils_obj.init_sizes(),
            x_col="filenames",
            y_col="class_label",
            batch_size=self.input_params['batch_size'],
            class_mode='categorical',
            color_mode='rgb',
            shuffle=True)

        nb_train_samples = len(train_generator.classes)
        nb_val_samples = len(val_generator.classes)

        history = model_stg1.fit_generator(
            generator=train_generator,
            steps_per_epoch=nb_train_samples //
            self.input_params['batch_size'],
            epochs=self.input_params['epochs1'],
            validation_data=val_generator,
            validation_steps=nb_val_samples // self.input_params['batch_size'],
            callbacks=TrainingUtils.callbacks_list(self, 1),
            workers=self.input_params['nworkers'],
            use_multiprocessing=False,
            max_queue_size=20)  #1 for stage 1

        hist_df = pd.DataFrame(history.history)
        hist_csv_file = self.path_dict['model_path'] + "stage{}/".format(
            1) + "{}_history_stage_{}.csv".format(
                self.input_params['model_name'], 1)
        with open(hist_csv_file, mode='w') as file:
            hist_df.to_csv(file, index=None)

        #model_stg1.load_weights(self.path_dict['model_path'] + "stage{}/".format(1) + "{}_weights_stage_{}.hdf5".format(self.input_params['model_name'], 1))
        model_stg1.save(
            self.path_dict['model_path'] + "stage{}/".format(1) +
            "{}_model_stage_{}.h5".format(self.input_params['model_name'], 1))

        TrainingUtils.save_summary(self, model_stg1, 1)
        TrainingUtils.plot_layer_arch(self, model_stg1, 1)

        stage1_params = dict()
        stage1_params['train_generator'] = train_generator
        stage1_params['val_generator'] = val_generator
        stage1_params['nb_train_samples'] = nb_train_samples
        stage1_params['nb_val_samples'] = nb_val_samples

        print("\nTime taken to train the model in stage 1: ", dt.now() - st)

        #Start model evaluation for Stage 1
        eval_utils = EvalUtils(self.input_params, self.path_dict, 1)
        eval_utils.predict_on_test()

        return model_stg1, stage1_params

Beispiel #32

0

Datei anzeigen

Datei: main.py Projekt: zmsweidan/mediawiki-converter

			indent thrice
* Bullet point 1
	* Bullet point 2
		* Bullet point 3
1. item 1
2. item 2
3. item 3
	1.item 3-1
"""
doc.set_section_text(text=text_1)

doc.create_section(2, 'Section 1.1')
text_2 = """
This text is centred
test 12345
	'''test 6789'''
"""
doc.set_section_text(2, text_2, True)

doc.create_section(1, 'Section 2')
doc.delete_section(3)

# WRITE FILES
util = Utility()
doc_txt = doc.__str__()
doc_xml = util.convert_to_xml(doc.get_wiki())
util.write_file(contents=doc_txt)
util.write_file(contents=doc_xml, file_type='xml', xsd_schema='wiki')

print('SCRIPT COMPLETED')