Beispiel #1
0
def predict(cfg, net, model_path):
    mode = cfg['mode']
    device = cfg['device']
    class_num = cfg['class_num']
    batch_size = cfg['batch_size']
    num_workers = cfg['num_workers']
    dataset_path = cfg['dataset_path']

    model_name = net.__class__.__name__
    if len(cfg['gpu_ids']) > 1:
        model_name = net.module.__class__.__name__

    # output_dir = os.path.join(cfg['output_dir'], model_name, splitext(split(model_path)[1])[0])
    
    # if os.path.exists(output_dir):
    #     shutil.rmtree(output_dir)
    # os.makedirs(output_dir,exist_ok=True)

    current_time = datetime.datetime.now()
    logger_file = os.path.join('log', mode, '{} {}.log'.
                    format(model_name, current_time.strftime('%Y-%m-%d %H-%M-%S')))
    logger = setup_logger(f'{model_name} {mode}',logger_file)

    dataset = SelfDataset(os.path.join(dataset_path, 'data_test.npy'), os.path.join(dataset_path, 'label_test.npy'), logger)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    net.load_state_dict(torch.load(model_path, map_location=device))

    net.eval()
    
    test_loss, acc, f1, corr = eval(cfg, net, loader, device, 'Test')

    logger.info('Test Mean accuracy: {:.6f} F-Score: {:.6f} Correlation Coefficient: {:.6f}'.format(acc, f1, corr))
           
def frame_and_publish(topic, key, event):
    # logger
    pub_logger = setup_logger('pub_log', 'logs/publisher_log.log', logging.DEBUG)
    pub_logger.info("New publishment starts")

    producer = connect_kafka_producer()
    print('Trying to open camera')
    vidcap = cv2.VideoCapture(0)
    print('Opened camera by index 0 \n\n')
    success, image = vidcap.read()
    count = 0
    while success:
        # capture frame
        if event.is_set():
            break
        success, image = vidcap.read()
        #publish to kafka topic
        try:
            key_bytes = bytes(key, encoding='utf-8')
            ret, buffer = cv2.imencode('.jpg', image)
            producer.send(topic, key=key_bytes, value=buffer.tobytes())
            producer.flush()
            pub_logger.debug('Image {} has been published successfully'.format(count))
            count += 1
        except Exception as ex:
            pub_logger.debug('Exception in publishing message')
            pub_logger.debug(str(ex))

    # close the publisher
    if producer is not None:
        producer.close()
    def __init__(self,chatID,raw_message):

        #Instantiate the command library from the parent class with Super!
        self.commands_dict = super(ActiveConversation, self).get_commands_dict()
        # Instantiate properties with the ChatId and Message
        self.chatID = chatID
        self.active = True
        self.ActualMessage = raw_message
        #Instantiate phase indicator and get Unique id
        self.conversation_phase = 0 #For multiple-phase conversations
        self.uniqueID = uuid.uuid4().get_hex()
        #Set conversation logger.
        newlogger = tools.setup_logger(self.uniqueID,os.path.dirname(os.path.realpath(__file__))
                                       +'/logs/'+str(self.chatID)+'.log')
        self.logger = logging.getLogger(self.uniqueID)
        #Set error counter
        self.errorcounter = 0

        #Classify the creation command. Do we need the chat engine or we know the message command?

        if self.commandsQ(raw_message):

            self.function = self.AssignCommand(raw_message)
            self.logger.info('Conversation marked as command.')
            self.function_type = 'BotCommand'

        else:

            self.function = None
            self.function_type = 'ChatEngine'
            self.logger.info('Conversation marked as chat.')


        self.cache =[]
def consume_save(display_topic, offset, db_info, event):
    # logger
    sav_logger = setup_logger('sav_log', 'logs/saver_log.log', logging.DEBUG)
    sav_logger.info("New saving starts")

    # init db connection
    db = pymysql.connect(db_info['host'], db_info['user'], db_info['passwd'],
                         db_info['db'])
    cursor = db.cursor()
    # consume predicted sentiment
    consumer = KafkaConsumer(display_topic,
                             auto_offset_reset=offset,
                             bootstrap_servers=['localhost:9092'],
                             api_version=(0, 10),
                             consumer_timeout_ms=1000)
    # consume one by one and save the result to db
    flag = True
    num_trials = 5
    while flag:
        if event.is_set():
            break
        for pred in consumer:
            if event.is_set():
                flag = False
                break
            pred = pred.value
            try:
                pred = int(pred.decode("utf-8"))
                # save sentiment
                query = "INSERT INTO PRED (pred) VALUES ({})".format(pred)
                cursor.execute(query)
                db.commit()
                sav_logger.debug('Sucessfully saved sentiment {}'.format(pred))
            except Exception as ex:
                sav_logger.debug('failed')
                sav_logger(ex)
                db.rollback()
        num_trials -= 1
    consumer.close()
    db.close()
Beispiel #5
0
def train(cfg, net):
    lr = cfg['lr']
    mode = cfg['mode']
    device = cfg['device']
    weight = cfg['weight']
    batch_size = cfg['batch_size']
    num_workers = cfg['num_workers']
    dataset_path = cfg['dataset_path']
    model_name = net.__class__.__name__

    current_time = datetime.datetime.now()
    logger_file = os.path.join('log', mode, '{} {} lr {} bs {} ep {}.log'.
                    format(model_name,current_time.strftime('%Y-%m-%d %H-%M-%S'),
                            cfg['lr'], cfg['batch_size'], cfg['epochs']))
    logger = setup_logger(f'{model_name} {mode}',logger_file)

    writer = SummaryWriter(log_dir=os.path.join('runs', model_name))

    dataset = SelfDataset(os.path.join(dataset_path, 'data_train.npy'), os.path.join(dataset_path, 'label_train.npy'), logger)
    loader = DataLoader(dataset)
    n_val = int(len(dataset) * cfg['valid_percent'] / 100)
    n_train = len(dataset) - n_val
    train_dataset, val_dataset = random_split(dataset, [n_train, n_val])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False)

    data, label = iter(train_loader).next()
    data = data.to(device)
    label = label.to(device)
    writer.add_graph(net, data)

    display_weight = weight
    weight = torch.tensor(weight)
    if device == 'cuda':
        weight = weight.cuda()

    criterion = CrossEntropyLoss(weight=weight)
    
    optimizer = Adam(net.parameters(), lr=cfg['lr'], betas=(0.9,0.999))
    # optimizer = SGD(net.parameters(), lr=cfg['lr'], momentum=0.9)

    logger.info(f'''Starting training:
        Model:           {net.__class__.__name__}
        Epochs:          {cfg['epochs']}
        Batch size:      {cfg['batch_size']}
        Learning rate:   {cfg['lr']}
        Training size:   {n_train}
        Weight:          {display_weight}
        Validation size: {n_val}
        Device:          {device}
    ''')

    iter_num = 0
    train_batch_num = len(train_dataset) // batch_size
    max_acc = 0
    max_f1 = 0
    max_corr = 0
    best_epoch = 0
    for epoch in range(1, cfg['epochs'] + 1):
        net.train()
        epoch_loss = []
        logger.info('epoch[{}/{}]'.format(epoch,cfg['epochs']))
        with tqdm(total=n_train, desc='Epoch {}/{}'.format(epoch,cfg['epochs']),unit='items') as pbar:
            # for iter, (data, label) in enumerate(train_loader):
            for data, label in train_loader:
                data = data.to(device)
                label = label.to(device)

                predict = net(data)

                loss = criterion(predict, torch.argmax(label, dim=1)) # CrossEntropy
                # loss = criterion(predict, label) # MSE

                loss_item = loss.item()
                epoch_loss.append(loss_item)
                pbar.set_postfix(**{'loss (batch)':loss_item})
                
                optimizer.zero_grad()
                loss.backward()
                # nn.utils.clip_grad_value_(net.parameters(), 0.1) 
                optimizer.step()

                pbar.update(data.shape[0])
                iter_num += 1
                if iter_num % (train_batch_num//10) == 0:
                    # acc, f1, corr = metric(torch.argmax(label, dim=1).data.cpu().numpy(), torch.argmax(predict, dim=1).data.cpu().numpy())
                    with torch.no_grad():
                        acc, f1, corr = metric(label.data.cpu().numpy(), F.softmax(predict, dim=1).data.cpu().numpy())
                    logger.info('Training Loss: {:.6f} Mean accuracy: {:.6f} F-Score: {:.6f} Correlation Coefficient: {:.6f}'.format(loss_item, acc, f1, corr))
                    writer.add_scalar('Training Loss', loss_item, iter_num)
                    writer.add_scalar('Training Mean accuracy', acc, iter_num)
                    writer.add_scalar('Training F-Score', f1, iter_num)
                    writer.add_scalar('Training Correlation Coefficient', corr, iter_num)

        net.eval()
        val_loss, acc, f1, corr = eval(cfg, net, val_loader, device, 'Validation')
        logger.info('Validation Loss: {:.6f} Mean accuracy: {:.6f} F-Score: {:.6f} Correlation Coefficient: {:.6f}'.format(val_loss, acc, f1, corr))
        # writer.add_scalar('Validation Loss', val_loss, epoch)
        writer.add_scalar('Validation Mean accuracy', acc, epoch)
        writer.add_scalar('Validation F-Score', f1, epoch)
        writer.add_scalar('Validation Correlation Coefficient', corr, epoch)

        if (acc > max_acc or (acc == max_acc and f1 > max_f1) or (acc == max_acc and f1 == max_f1 and corr > max_corr)) and (epoch > (cfg['epochs'] >> 2)):
            max_acc = acc
            max_f1 = f1
            max_corr = corr
            best_epoch = epoch
            if not os.path.exists(cfg['checkpoint_dir']):
                os.makedirs(cfg['checkpoint_dir'])
                logger.info('Created checkpoint directory:{}'.format(cfg['checkpoint_dir']))
            torch.save(net.state_dict(),os.path.join(cfg['checkpoint_dir'],"{}.pth".format(cfg['model'])))
            logger.info(f'Checkpoint {epoch} saved!')

    logger.info('Best epoch: {}/{} Mean accuracy: {:.6f} F-Score: {:.6f} Correlation Coefficient: {:.6f}'.format(best_epoch, cfg['epochs'], max_acc, max_f1, max_corr))
    writer.close()
# -*- coding: UTF-8 -*-

from bs4 import BeautifulSoup
import copy
from datetime import datetime
import os
from pymongo import MongoClient
import re
import sys
from time import sleep
import urllib2

from tools import renew_ip, headers, setup_logger, exception_hook


logger = setup_logger('b92statistike-scrape_comments.log')

re_date_comment_extract = re.compile('([0-9]+).\s(.*)\s(201[456])\s([0-9]+):([0-9]+)')

def convert_month(monthstr):
    """
    Converts month as string to something datetime module understands.
    TODO: this sucks, dictionary is better
    """
    if monthstr == 'januar':
        return 1
    elif monthstr == 'februar':
        return 2
    elif monthstr == 'mart':
        return 3
    elif monthstr == 'april':
Beispiel #7
0
def train_net():
    # preparing
    # args = Sina_News_Config()
    args = Ren_CECps_Config()
    processor = Processor(args)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    save_dir = join(args.output_dir, time_stamp)
    os.makedirs(save_dir)
    backup_code(os.path.abspath(os.path.curdir),
                os.path.join(save_dir, "code"))
    # set up logger
    logger = setup_logger(save_path=join(save_dir, "logger.txt"))
    _print = logger.info

    tokenizer = BertTokenizer.from_pretrained(os.path.join(
        args.output_dir, args.pretrained_model_name,
        args.pretrained_model_name + "-vocab.txt"),
                                              do_lower_case=False)

    train_examples = processor.get_train_examples()
    num_train_optimization_steps = int(
        len(train_examples) / args.train_batch_size /
        args.gradient_accumulation_steps) * args.num_train_epochs

    # Prepare model
    model = ClassificationModel(
        os.path.join(args.output_dir, args.pretrained_model_name),
        args.num_labels, args.dropout_prob, args.hidden_size)
    model.to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        _print("Using multi GPUs!")

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=args.learning_rate,
                         warmup=args.warmup_proportion,
                         t_total=num_train_optimization_steps)
    # Prepare dataloader
    train_dataloader = make_dataloder(train_examples,
                                      args.max_seq_length,
                                      tokenizer,
                                      args.train_batch_size,
                                      mode="train")
    test_examples = processor.get_test_examples()
    with open("train.examples", "wb") as f:
        pickle.dump(train_examples, f)
    with open("test.examples", "wb") as f:
        pickle.dump(test_examples, f)
    test_dataloader = make_dataloder(test_examples,
                                     args.max_seq_length,
                                     tokenizer,
                                     args.train_batch_size,
                                     mode="test")
    for epoch in trange(int(args.num_train_epochs), desc="Epoch", leave=True):
        # tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        model.train()
        for step, batch in enumerate(tqdm(train_dataloader,
                                          desc="Training...")):
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, label_scores = batch
            loss1, loss2 = model(input_ids, segment_ids, input_mask, label_ids,
                                 label_scores)
            # tr_loss += loss1.mean().item() + loss2.mean().item()
            (loss1 + loss2).mean(dim=0).sum().backward()
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

        # do evaluation
        model.eval()
        clf_pred, clf_label, rk_pred, rk_label = [], [], [], []
        for step, batch in enumerate(
                tqdm(test_dataloader, desc="Evaluating...")):
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, label_scores = batch
            with torch.no_grad():
                pred_logits, pred_scores = model(input_ids, segment_ids,
                                                 input_mask)
            pred_logits = pred_logits.ge(0).float()
            clf_pred.append(pred_logits.cpu().numpy())
            clf_label.append(label_ids.cpu().numpy())
            rk_pred.append(pred_scores.cpu().numpy())
            rk_label.append(label_scores.cpu().numpy())
        clf_pred, clf_label, rk_pred, rk_label = map(
            lambda x: np.vstack(x), (clf_pred, clf_label, rk_pred, rk_label))
        clf_result = evaluate_classification(clf_pred, clf_label)
        rk_result = evaluate_ranking(rk_pred, rk_label, clf_label)
        tb = pt.PrettyTable(field_names=list(clf_result.keys()) +
                            list(rk_result.keys()))
        tb.add_row(list(clf_result.values()) + list(rk_result.values()))
        tb.float_format = "0.4"
        output_str = "\n" + tb.__str__()
        tqdm.write(output_str)
        _print(output_str)
        torch.save(model.state_dict(),
                   join(save_dir, "model_{}.pt".format(epoch)))
Beispiel #8
0
import logging
from analyzer import *
import tools
import compiler

logger = tools.setup_logger('vm_logger', 'vm.log', logging.DEBUG)

from compiler import *


"""
Keep it simple
"""


MEM_SIZE=256        # Size of memory block
PROTECTED_SIZE=20   # Protected memory cannot be modified using code, just by mutation
GEN_REGS=8          # Number of general purpose registers
CODE_PCTG=0.25      # Percentage of memory extra-allocated to the Code Segment
STEPS_CYCLE=50      # Max number of steps in a run to avoid infinte loops or neverending programs


# Description of protected memory cells (only for the initial VM)
# These cells are the first positions of memory
# Offspringed VM is created from the values in the memory
PROTECTED=[
    ('ZERO',0),
    ('MEMSIZE',MEM_SIZE),
    ('PROTECTED',PROTECTED_SIZE),
    ('GENREGS',GEN_REGS),
    ('CODEPCTG',CODE_PCTG),
from bs4 import BeautifulSoup
from datetime import timedelta
from pymongo import MongoClient
import re
import sys
from time import sleep
from urllib2 import HTTPError
import urllib2

from tools import renew_ip, headers, setup_logger, exception_hook
import os


logger = setup_logger('b92statistike-scrape_news.log')

re_changing_publishing_time = re.compile('.*\|\s+([0-9][0-9]):([0-9][0-9])\s+->.*')

def _append_one_news_text(db, news_id, news_link, news_date_published):
    """
    Reads news text for one news and updates it in Mongo.
    It also updates news first date of publish (we don't have that on metadata fetch).
    There is simple retry logic if we get any HTTP error. All pages are
    saved locally and we use first those if they exists.
    """
    if os.path.exists('dumps/news/n%s.html' % news_id):
        logger.info('Found cached file %s, reusing it', news_id)
        f = open('dumps/news/n%s.html' % news_id, 'r')
        html_content = f.read()
        f.close()
    else:
        if news_link.startswith('?'):
def consume_pred(raw_topic, offset, det_path, model_path, display_topic,
                 event):
    # logger
    pre_logger = setup_logger('pre_log', 'logs/predicter_log.log',
                              logging.DEBUG)
    pre_logger.info("New prediction starts")

    # producer to publish to display topic
    producer = connect_kafka_producer()
    # face detector to crop faces
    detector = cv2.CascadeClassifier(det_path)
    # load sentiment prediction model
    model = get_model()
    model.load_weights(model_path)
    # consume raw images
    consumer = KafkaConsumer(raw_topic,
                             auto_offset_reset=offset,
                             bootstrap_servers=['localhost:9092'],
                             api_version=(0, 10),
                             consumer_timeout_ms=1000)
    key = bytes('display', encoding='utf-8')
    # predict one by one and publish the predicted result
    flag = True
    num_trials = 5
    while flag:
        if num_trials == 0:
            pre_logger.debug('Consumer has been idling for 5 times')
            break
        for im in consumer:
            # terminate
            if event.is_set():
                # quit two layers of loop
                flag = False
                break
            im = im.value
            try:
                im = np.frombuffer(im, dtype=np.int8)
                im = cv2.imdecode(im, cv2.IMREAD_COLOR)
                # predict
                preds = detect_and_predict(detector, im, model)
                # push to kafka topic
                pre_logger.debug('{} faces detected in current frame'.format(
                    len(preds)))
                for p in preds:
                    try:
                        p_bytes = bytes(str(p), encoding='utf-8')
                        producer.send(display_topic, key=key, value=p_bytes)
                        producer.flush()
                        pre_logger.debug(
                            'results {} predicted and published successfully'.
                            format(p))
                    except Exception as ex:
                        pre_logger.debug(
                            'A problem occurred when publishing to display topic'
                        )
                        pre_logger.debug(str(ex))
            except Exception as ex:
                pre_logger.debug('failed')
                pre_logger.debug(ex)

            num_trials -= 1
    if producer is not None:
        producer.close()
    consumer.close()
from bs4 import BeautifulSoup
from datetime import datetime
from pymongo import MongoClient
import re
import sys
from time import sleep
from urllib2 import HTTPError
import urllib2

from tools import renew_ip, headers, setup_logger, exception_hook
import os


logger = setup_logger('b92statistike-scrape_newsmetadata.log')

re_extract_link = re.compile('(yyyy=([0-9]+))*(&mm=([0-9]+))*(&dd=([0-9]+))*(&nav_category=[0-9]+)*[&?]nav_id=([0-9]+)')
re_hour_minute = re.compile('([0-9][0-9]):([0-9][0-9])')

def _insert_news_metadata_category(db, category_id):
    """
    Reads news metadata from one news category and insert them in Mongo.
    We go back in time, first we need to skip news after 2015., then we
    process those in 2015. and we know how to quit if we bump into news
    which are earlier than 2015.
    There is simple retry logic if we get any HTTP error. All pages are
    saved locally and we use first those if they exists.
    Function is completely idempotent.
    """
    start = 0 # Start is URL parametar for B92 link
    is_below_2015 = False
    while(not is_below_2015):
Beispiel #12
0
_ = checks_extended.AbstractCheck

import os
import tempfile
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed

import osmapi
import requests
import simplejson
from jinja2 import Environment, PackageLoader

import tools
from engine import CheckEngine, Result
from sources.source_factory import SourceFactory

logger = tools.setup_logger(logging_level=logging.INFO)


def process_entity(entity, context):
    """
    Takes one entity and performs all check with engine on it.
    :param entity: Entity to check
    :param context: Context
    :return: List of all performed checks
    """
    map_check = context['map-check']
    cr = CheckEngine(map_check['checks'], entity, context)
    return cr.check_all()


def generate_report(context, all_checks):
Beispiel #13
0
    tools.red('    ooo   oo   ooo                       ') +
    tools.yellow('                                     ') +
    tools.green('                       ooo   oo   ooo    '))
print(
    tools.red('       oooooooo      ====================') +
    tools.yellow('=====================================') +
    tools.green('====================      oooooooo       '))
print('')

start = timer()

# 1.  get the command line arguments 'args' and all valid build combinations in the check directory from 'builds.ini'
args, builds = args_parser.getArgsAndBuilds()

# 2.  set the logger 'log' with the debug level from 'args' to determine the level of logging which displays output to the user
tools.setup_logger(args.debug)
log = logging.getLogger('logger')

# 3.  perform the regression check by a) building executables
#                                     b) running the code
#                                     c) performing the defined analyzes
check.PerformCheck(start, builds, args, log)

# 4.  display the summary table with information for each build, run and analysis step
summary.SummaryOfErrors(builds, args)

# 5.  display if regression check was successful or not and return the corresponding error code
summary.finalize(start, 0, check.Run.total_errors,
                 check.ExternalRun.total_errors, check.Analyze.total_errors,
                 check.Analyze.total_infos)
def main():
    # Collect the arguments, if any were provided.
    parser = tools.argument_parser()
    args = parser.parse_args()

    # Initiate logger settings
    tools.setup_logger(log_level=args.log_level, log_type=args.log_type)

    # Get the input data and convert it to a odered-list of dictionaries
    csv_file = tools.OpenFile.gui_ask_open_csv()
    csv_file = tools.OpenFile.process_csv(csv_file)
    address_table = tools.table_to_dictionary(csv_file)

    if len(address_table) == 0:
        logging.warning("No data was provided.")
        return {'status': False, 'data': None}

    # Convert the address_table to a list of Address_FQDN objects
    # first, clean up hostname data.  remove fqdn and keep only hostname
    for address in address_table:
        address['device_hostname'] = _clean_device_hostname(
            address['device_hostname'])
        # If an interface name was provided, we'll convert it to a hostname
        if 'interface_name' in address.keys():
            if len(address['interface_name']) > 0:
                try:
                    address['device_hostname'] = _clean_interface_hostname(
                        address['device_hostname'], address['interface_name'])
                except Exception as error:
                    logging.warning(
                        f"Object data build failed on:  {address['device_hostname']} - {address['ip_address']}\n{error}"
                    )

    for i, address in enumerate(address_table):
        address_data = {}
        address_data.update({
            'ipv4_address': address['ip_address'],
            'hostname': address['device_hostname']
        })
        if 'domain' in address.keys():
            address_data.update({'domain': address['domain']})
        else:
            address_data.update({'domain': None})
        address_table[i] = address_data
    address_objects = []
    if s.MULTITHREAD:
        with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
            for address in address_table:
                try:
                    executor.submit(_build_address_fqdn_object,
                                    address_objects, address)
                except Exception as error:
                    logging.warning(
                        f"Object data build failed on:  {address['hostname']} - {address['ipv4_address']}\n{error}"
                    )
    else:
        for address in address_table:
            try:
                address_objects.append(Address_FQDN(**address))
            except Exception as error:
                logging.warning(
                    f"Object data build failed on:  {address['hostname']} - {address['ipv4_address']}\n{error}"
                )

    # Reformat and save data to a spreadsheet
    if s.SAVE_TO_CSV:
        output_data = [[
            "FQDN", "PTR", "IP Address", "FLU Exists", "FLU Existing Value",
            "FLU Needs Update", "RLU Exists", "RLU Existing Value",
            "RLU Needs Update"
        ]]
        for obj in address_objects:
            output_data.append([
                obj.full_name, obj.ptr_record, obj.ip_address,
                obj.forward_lookup_exists, obj.forward_lookup_existing_value,
                obj.forward_lookup_needs_update, obj.reverse_lookup_exists,
                obj.reverse_lookup_existing_value,
                obj.reverse_lookup_needs_update
            ])
        tools.SaveFile.gui_ask_save_csv(output_data)

    return {'status': True, 'data': address_objects}
from bs4 import BeautifulSoup
import sys
from urllib2 import HTTPError
import urllib2

from tools import renew_ip, setup_logger, exception_hook, headers

logger = setup_logger('b92statistike-dump_categories.log')

if __name__ == '__main__':
    """
    This program reads all pages where categories are defined and prints them out.
    TODO: I used this only when data was in SQL, but even then I manually created inserts
    out of this dumped output. Should be more automatic, but it was one-time thing.
    """
    sys.excepthook = exception_hook
    renew_ip()

    for i in range(1, 2000):
        url = 'http://www.b92.net/info/vesti/index.php?&nav_category={}'.format(i)
        logger.info('Fetching url %s', url)
        retries = 0
        try:
            request = urllib2.Request(url, None, headers)
            response = urllib2.urlopen(request)
        except HTTPError as e:
            logger.warning('Error during fetching')
            if retries == 3:
                raise e
            else:
                retries = retries + 1
Beispiel #16
0
                        type=str,
                        help="Path of testing data")
    parser.add_argument("--model_path",
                        default='./data/sgns.sogounews.bigram-char',
                        type=str,
                        help="Path of Word2Vec model")
    parser.add_argument("--dim",
                        default=300,
                        type=int,
                        help="Dimension of word vector")
    args = parser.parse_args()

    logger_file = os.path.join(
        'log', 'preprocess',
        '{}.log'.format(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')))
    logger = setup_logger('preprocess', logger_file)

    logger.info(f'''===> Preprocessing <===
        Train Path:      {args.train_path}
        Test Path:       {args.test_path}
        Model Path:      {args.model_path}
        Output Path:     {'./dataset'}
        Dimension:       {args.dim}
    ''')

    logger.info('Loading Word2Vec model')
    model = load_model(args.model_path)
    logger.info('Loading Word2Vec model completed!')

    logger.info('Prepocessing training dataset')
    divide_dataset(path=args.train_path,
Beispiel #17
0
# -*- coding: UTF-8 -*-

import emoji
from pymongo import MongoClient
import pymssql
import sys

from tools import setup_logger, exception_hook

DB_SERVER = '<add-your-server>.database.windows.net'
DB_USERNAME = '******'
DB_PASSWORD = '******'
DB_NAME = 'b92'

logger = setup_logger('b92statistike-mongo2sql.log')

def workaround_freetds_bug(text):
    """
    Emoticons in Instagram posts are outside of 0xffff unicode range
    TDS doesn't like this. We need to use emoji package to convert
    those pesky emoticons to text + there are some other emoticons
    where emoji fails, I guess I should update emoji DB.
    """
    text = emoji.demojize(text)
    text = text.replace(u'🇫󾓮', u' ')
    text = text.replace(u'🇺', u' ')
    text = text.replace(u'🇺', u' ')
    return text

def insert_one_news(news, cursor):
    """Insert one news from dictionary to SQL (jncluding all of its comments)"""
Beispiel #18
0
import logging
import tools
from gen.mvasmListener import *

logger = tools.setup_logger('sem_logger', 'sem.log', logging.DEBUG)


class Sem(mvasmListener):
    def __init__(self, context):
        self.MemPointer = 0
        self.Sequence = []
        self.Labels = {}
        self.Context = context

    def add(self, val):
        self.Sequence.append(val)

    def pop(self):
        a = self.Sequence.pop()
        return a

        # Enter a parse tree produced by subleqasmParser#program.

    def enterProgram(self, ctx: mvasmParser.ProgramContext):
        logger.info("")
        if self.Context['stage'] == 1:
            self.MemPointer = 0

    # Exit a parse tree produced by mvasmParser#program.
    def exitProgram(self, ctx: mvasmParser.ProgramContext):
        logger.info("")