コード例 #1
0
ファイル: request.py プロジェクト: Snailwicked/spider_manage
    def __init__(self,
                 url: str,
                 method: str = 'GET',
                 *,
                 callback=None,
                 load_js: bool = False,
                 metadata: dict = None,
                 headers: dict = None,
                 request_config: dict = None,
                 request_session=None,
                 res_type: str = 'text',
                 **kwargs):
        """
        Initialization parameters
        """
        self.url = url
        self.method = method.upper()
        if self.method not in self.METHOD:
            raise ValueError('%s method is not supported' % self.method)

        self.callback = callback
        self.load_js = load_js
        self.headers = headers
        self.metadata = metadata if metadata is not None else {}
        self.request_session = request_session
        if request_config is None:
            self.request_config = self.REQUEST_CONFIG
        else:
            self.request_config = request_config
        self.res_type = res_type
        self.kwargs = kwargs

        self.close_request_session = False
        self.logger = get_logger(name=self.name)
        self.retry_times = self.request_config.get('RETRIES', 3)
コード例 #2
0
ファイル: tests.py プロジェクト: medillcmip/Radregator
    def test_logfile_created(self):
        from tempfile import mkdtemp
        from core.utils import get_logger
        from os.path import isfile

        filename = "%s/sourcerer.log" % mkdtemp() 
        logger = get_logger('test', filename)

        if not isfile(filename):
            self.fail("Log file %s not created by get_logger()" % filename)
コード例 #3
0
ファイル: tests.py プロジェクト: medillcmip/Radregator
    def test_log_filename_not_writeable(self):
        from core.utils import get_logger

        filename = "%s/sourcerer.log" % self._get_nonexistant_directory_name()

        try:
            logger = get_logger('test', filename)
        except IOError:
            # We should catch the IO error in get_logger()
            self.fail("IOError exception not raised for nonexistant logfile directory.")
コード例 #4
0
ファイル: spider.py プロジェクト: Snailwicked/spider_manage
 def __init__(self, middleware, loop=None):
     if not self.start_urls or not isinstance(self.start_urls, list):
         raise ValueError(
             "Spider must have a param named start_urls, eg: start_urls = ['https://www.github.com']"
         )
     self.logger = get_logger(name=self.name)
     self.loop = loop or asyncio.new_event_loop()
     asyncio.set_event_loop(self.loop)
     self.request_queue = asyncio.Queue()
     self.sem = asyncio.Semaphore(getattr(self, 'concurrency', 3))
     self.middleware = middleware or Middleware()
コード例 #5
0
    def __init__(self, bucket, resource='s3'):
        self.bucket = bucket
        self.app_logger = get_logger('app')

        if 'session' not in self.session or self.session[
                'expire'] < datetime.utcnow():
            self.session.update({
                'session':
                boto3.Session(
                    aws_access_key_id=configs.AWS_ACCESS_KEY_ID,
                    aws_secret_access_key=configs.AWS_SECRET_ACCESS_KEY),
                'expire':
                datetime.utcnow() + timedelta(hours=1)
            })
        if resource not in self.clients:
            self.clients.update(
                {resource: self.session['session'].client(resource)})
コード例 #6
0
ファイル: tests.py プロジェクト: medillcmip/Radregator
    def test_log_error(self):
        from core.utils import get_logger
        from os.path import isfile
        from tempfile import mkdtemp

        filename = "%s/sourcerer.log" % mkdtemp() 

        logger = get_logger('test', filename)

        error_msg = "This is an error."

        logger.error(error_msg)

        f = open(filename)
        line = f.readline()

        # The most recent log entry in the file should contain our error message
        self.assertNotEqual(line.find(error_msg), -1)
コード例 #7
0
def evaluate_line(input_str):
    config = load_config(file_path + "/" + FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(file_path + "/" + FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, file_path + "/" + FLAGS.ckpt_path,
                             load_word2vec, config, id_to_char, logger)
        # for report in input_str:
        #     result = model.evaluate_line(sess, input_from_line(report, char_to_id), id_to_tag)
        #     print(result)

        report = input_str
        result = model.evaluate_line(sess, input_from_line(report, char_to_id),
                                     id_to_tag)
        print(result)
        return result
コード例 #8
0
ファイル: collect.py プロジェクト: gallantlab/realtimefmri
import sys
import os
import time
import argparse
from core.collection import DataCollector
from core.utils import get_logger
logger = get_logger('collect', dest=['console', 'file'])

if __name__ == "__main__":

	parser = argparse.ArgumentParser(description='Collect data')
	parser.add_argument('-s', '--simulate', 
		action='store', 
		dest='simulate', 
		default=None, 
		help='''Simulate data collection''')
	parser.add_argument('-i', '--interval',
		action='store',
		dest='interval',
		default='2',
		help='''Interval between scans, in seconds. Only active if simulate is True''')
	parser.add_argument('-d', '--directory',
		action='store',
		dest='directory',
		default='tmp',
		help='Directory to watch')
	parser.add_argument('-p', '--parent',
		action='store_true',
		dest='parent',
		default=False,
		help='Monitor the provided directory for the first new folder, then monitor that folder for new files')
コード例 #9
0
from core.engine import train, evaluate, LargerHolder
from core.metric import AverageMetric, AccuracyMetric
from core.model import cifar_resnet20
from core.loss import CACLoss
from core.utils import get_args, get_logger
from core.utils import set_cudnn_auto_tune
from core.utils import FLOPs
from core.utils import replace_convs_with_cac

if __name__ == "__main__":
    args = get_args()
    hocon = pyhocon.ConfigFactory.parse_file(args.config)
    output_directory = args.output_directory
    os.makedirs(output_directory, exist_ok=False)
    logger = get_logger("train", output_directory)

    set_cudnn_auto_tune()

    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    train_transform = torchvision.transforms.Compose([
        torchvision.transforms.RandomCrop(size=32, padding=4),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            mean=hocon.get_list("dataset.mean"),
            std=hocon.get_list("dataset.std"),
        ),
    ])
    val_transform = torchvision.transforms.Compose([
コード例 #10
0
import requests
from bs4 import BeautifulSoup as bs
import traceback

from core.db import insert
from core.utils import (
    parse_time,
    headers,
    logger_time,
    get_logger,
)

SRC_URL = 'http://openinsider.com/screener?s={}&o=&pl=&ph=&ll=&lh=&fd=0&fdr=&td=0&tdr=&fdlyl=&fdlyh=&daysago=&xp=1&xs=1&vl=&vh=&ocl=&och=&sic1=-1&sicl=100&sich=9999&grp=0&nfl=&nfh=&nil=&nih=&nol=&noh=&v2l=&v2h=&oc2l=&oc2h=&sortcol=0&cnt=10000000&page=1'

logger = get_logger('openinsider-service')


def get_request(insider_url, ticker):
    logger.info(f'Get request with tickername: {ticker}')
    session = requests.Session()
    data = session.get(insider_url.format(ticker),
                       headers=headers,
                       stream=True)
    return data.text


@logger_time
def get_data(html):
    soup = bs(html, 'lxml')
    rows = soup.find('table', class_='tinytable').find('tbody').find_all('tr')
    for i in rows:
コード例 #11
0
from typing import Dict, List, Tuple, AnyStr
from datetime import datetime

import requests
from pytz import timezone

from core.utils import (
    parse_time,
    headers,
    get_logger,
    url,
    logger_time,
)
from core.db import insert

logger = get_logger('pulse-service')


def get_cursor_number(url: str, ticker: str, cursor='9999999') -> Dict:
    session = requests.Session()
    logger.info(f'Get cursor number from {url.format(ticker, cursor)}')
    data = session.get(url.format(ticker, cursor),
                       headers=headers,
                       stream=True)
    logger.info(
        f"Prev cursor number is {data.json()['payload']['nextCursor']}")
    return data.json()['payload']['nextCursor']


def get_data_from_api(url: str, ticker: str, cursor: str) -> None:
    session = requests.Session()
コード例 #12
0
ファイル: preprocess.py プロジェクト: gallantlab/realtimefmri
import os
import time
import argparse
from core.preprocessing import Preprocessor
from core.utils import get_logger

logger = get_logger("preprocess", dest=["console", "file"])

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Preprocess data")
    parser.add_argument("config", action="store", help="Name of configuration file")
    args = parser.parse_args()
    logger.info("Loading preprocessing pipeline from %s" % args.config)

    preproc = Preprocessor(args.config)
    preproc.run()
コード例 #13
0
def train():
    # 加载数据集
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # 选择tag形式 (IOB / IOBES)  默认使用IOBES
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    if not os.path.isfile(FLAGS.map_file):
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            # {'S-LOC': 10, 'E-LOC': 3, 'B-ORG': 4, 'S-PER': 11, 'S-ORG': 12, 'O': 0,
            # 'E-ORG': 5, 'I-LOC': 6, 'I-PER': 7, 'I-ORG': 1, 'B-PER': 8, 'B-LOC': 2, 'E-PER': 9}
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # 转化成数字化的数据
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), len(dev_data), len(test_data)))

    # 长度不足补0
    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)

    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # GPU设置
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data

    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    # 每100次算一次平均loss
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
コード例 #14
0
ファイル: stimulate.py プロジェクト: gallantlab/realtimefmri
#!/usr/bin/env python
import os
import time
import argparse
from core.stimulation import Stimulator
from core.utils import get_logger
logger = get_logger('stimulate', dest=['console', 'file'])

if __name__=='__main__':
	parser = argparse.ArgumentParser(description='Preprocess data')
	parser.add_argument('config',
		action='store',
		nargs='?',
		default='stim-01',
		help='Name of configuration file')
	args = parser.parse_args()

	stim = Stimulator(args.config)
	stim.run() # this will start an infinite run loop
コード例 #15
0
ファイル: forms.py プロジェクト: medillcmip/Radregator
from django import forms
from django.forms.widgets import Select, HiddenInput
from core.models import Comment,CommentType,Topic
from tagger.models import Tag
from users.models import UserProfile
from django.forms.widgets import CheckboxSelectMultiple
from core import utils

logger = utils.get_logger(__name__)

class CommentDeleteForm(forms.Form):
    allcomments = Comment.objects.filter(is_deleted=False).filter(is_parent=True)
    comments = forms.ModelMultipleChoiceField(allcomments, )


class CommentTopicForm(forms.Form):
    allcomments = Comment.objects.filter(is_deleted=False).filter(is_parent=True)
    alltopics = Topic.objects.filter(is_deleted=False)
    comment = forms.ModelChoiceField(allcomments, empty_label = None)
    topic = forms.ModelChoiceField(alltopics, empty_label = None)

    
class TopicDeleteForm(forms.Form):
    alltopics = Topic.objects.filter(is_deleted=False)
    topics = forms.ModelMultipleChoiceField(alltopics, )


class NewSummaryForm(forms.Form):
    """ Form to let a user create a new summary for a topic"""

    alltopics = Topic.objects.filter(is_deleted=False)