def __getitem__(self, idx): ret = {} if self.phase == 'train': ret['data'] = self.data[0][idx] ret['length'] = len( ret['data'][ret['data'] != Cfg().vocab_size - 1]) ret['label'] = self.data[1][idx] else: ret['data'] = np.array(self.data[idx]) ret['length'] = len( ret['data'][ret['data'] != Cfg().vocab_size - 1]) return ret
def __init__(self, config_file): """ :param config_file: The .ini type configuration text file. """ cfg = Cfg(config_file) # Reads config file self.conn_string = "host=%s port=%s dbname=%s user=%s password=%s" % \ (cfg.pg_host, cfg.pg_port, cfg.pg_db, cfg.pg_lgn, cfg.pg_pwd)
def __init__(self, ngpu): super(Generator, self).__init__() cfg = Cfg() self.ngpu = ngpu nz, ngf, nc = cfg.nz, cfg.ngf, cfg.nc self.main = nn.Sequential( # input is Z, going into a convolution nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf), nn.ReLU(True), # state size. (ngf) x 32 x 32 nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 )
def __init__(self): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 self.vec = os.path.join(Cfg().word2vec_from_scratch, 'emb_weights.pkl') self.vocab = os.path.join(Cfg().word2vec_from_scratch, 'vocab.pkl') self.dropout = 0.1 # 随机失活 self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 self.num_classes = 7 # 类别数 self.n_vocab = 0 # 词表大小,在运行时赋值 self.num_epochs = 20 # epoch数 self.info_freq = 100 self.val_freq = 3000 self.model_save_path = Cfg().checkpoint['senti'] self.batch_size = 16 # mini-batch大小 self.pad_size = 32 # 每句话处理成的长度(短填长切) self.learning_rate = 1e-3 # 学习率 self.embed = 100
def label_data(): from utils.lstm_attn_utils import label_dict from tqdm import tqdm import pandas as pd res = {'心理方面': {'content': [], 'polarity': [], 'intensity': []}, '学业方面': {'content': [], 'polarity': [], 'intensity': []}, '职业发展': {'content': [], 'polarity': [], 'intensity': []}, '恋爱关系': {'content': [], 'polarity': [], 'intensity': []} } with open(os.path.join(Cfg().crawler_save_path, 'new_result_notopic.txt'), 'r', encoding='utf-8') as f: for i, line in tqdm(enumerate(f.readlines())): content, topic = line.split(' ')[0].strip(), line.split(' ')[1].strip() res[topic]['content'].append(content) senti = label_dict[lstm_attn_test(content)] if senti == 0: polarity = 0 intensity = 0 elif senti <= 3: polarity = 1 intensity = senti else: polarity = -1 intensity = senti - 3 res[topic]['polarity'].append(polarity) res[topic]['intensity'].append(intensity) with open(os.path.join(cfg.tmp, './tmp.pkl'), 'wb') as fp: pickle.dump(res, fp) writer = pd.ExcelWriter(os.path.join(cfg.labeled, '\output.xlsx')) for topic in res: pd.DataFrame(res[topic]).to_excel(writer, topic) writer.save()
def main(): ret = True args = docopt(USAGE, version=VERSION) conf = Cfg(args['--cfg']) opts = conf.get_configs() opts['dry'] = args['--dry'] opts['profile'] = args['--profile'] opts['safe'] = not args['--force'] opts['installdiff'] = not args['--nodiff'] opts['link'] = args['--link'] opts['quiet'] = not args['--verbose'] header() try: if args['list']: # list existing profiles list_profiles(conf) elif args['listfiles']: # list files for selected profile list_files(opts, conf) elif args['install']: # install the dotfiles stored in dotdrop ret = install(opts, conf) elif args['compare']: # compare local dotfiles with dotfiles stored in dotdrop tmp = utils.get_tmpdir() if compare(opts, conf, tmp, args['--files']): LOG.raw('\ntemporary files available under %s' % (tmp)) else: os.rmdir(tmp) elif args['import']: # import dotfile(s) importer(opts, conf, args['<paths>']) except KeyboardInterrupt: LOG.err('interrupted') ret = False return ret
def __init__(self): # what is an RAN? # External memory config = Cfg() self.conf = config #build model self.InitModel() self.InitTrainingModel()
def __init__(self, model_): self.cfg = Cfg() # Model - could be pre-trained... pre-sharing weights??? self.model = model_ # Most Recent Track self.memory = np.zeros(shape=(4, self.cfg.lookbackLength)) self.bboxHistory = [] self.state = TrackerState.INIT
def __init__(self) -> None: super().__init__() log.info("Initializing Global configuration...") # Load up the global configuration # See http://docs.red-dove.com/cfg/python.html#getting-started-with-cfg-in-python for how to use Config self.global_config = Cfg(GLOBAL_CONFIG_FILE) self.fairgame_config = self.global_config.get("FAIRGAME") self.profile_path = None self.get_browser_profile_path()
def crawl(): """ Crawl all the Activity Streams. """ cfg = Cfg() activity_stream_urls = cfg.cfg['activity_stream_list'] db_engine = db_setup(cfg.cfg['db_uri']) log('- - - - - - - - - - START - - - - - - - - - -') log('Going through {} activity stream(s).'.format( len(activity_stream_urls))) # crawl for url in activity_stream_urls: crawl_single(url, db_engine) log('- - - - - - - - - - END - - - - - - - - - -')
def log(msg): """ Write a log message. """ cfg = Cfg() timestamp = str(datetime.datetime.now()).split('.')[0] fn = cfg.cfg['log_file'] # make /dev/stdout usable as log file # https://www.bugs.python.org/issue27805 # side note: stat.S_ISCHR(os.stat(fn).st_mode) doesn't seem to work in an # alpine linux docker container running canvas indexer with # gunicorn although manually executing it on a python shell in # the container works if fn == '/dev/stdout': mode = 'w' else: mode = 'a' with open(fn, mode) as f: f.write('[{}] {}\n'.format(timestamp, msg))
def get_topic_senti_att(sentence): import torch from torch.utils.data import DataLoader from corpus_dataloader import PakedData from lstm_attn import AttnModel model = AttnModel() model.load_state_dict( torch.load(os.path.join(Cfg().checkpoint['senti'], '5ABLstm_24.ckpt'))) model.eval() numeric_data = build_lstm_test_data(sentence=sentence) if len(numeric_data) <= 0: return test_data = PakedData(numeric_data, phase='test') test_iter = DataLoader(test_data, batch_size=1) for sample in test_iter: att, pred = model(sample['data'][0].long(), sample['length'], 'test') predict = torch.max(pred.data, 1)[1].cpu().numpy() return int(predict[0]), list(att)
def test_dogvscat(): train_li = LoadImage(Cfg.train_root, train=True, rate=Cfg.split_rate) vali_li = LoadImage(Cfg.train_root, train=False, rate=Cfg.split_rate) print("一共有{0}张图片参与训练,{1}张图片参与验证".format(len(train_li), len(vali_li))) #test_li = LoadImage(test_root,train=False,test=True) cfg = Cfg() train_li_loader = train_li.data_loader(train_li, batch_size=Cfg.batch_size, shuffle=True, num_workers=Cfg.num_workers) vali_li_loader = vali_li.data_loader(vali_li, batch_size=1, shuffle=True, num_workers=Cfg.num_workers) vgg = Vgg() vgg.fit(vgg, train_li_loader, vali_li_loader, max_item=cfg.max_item, lr=cfg.lr, mu=Cfg.lr_rate)
def __init__(self, hidden_size=128, input_size=100, bidirectional=True, num_layers=2, num_classes=7): super(BiLstm, self).__init__() weights = torch.from_numpy( pickle.load(open(os.path.join(Cfg().word2vec_from_scratch, 'emb_weights.pkl'), 'rb')) ).float() self.embeddings = nn.Embedding.from_pretrained(weights, freeze=True) self.model_name = 'BLSTM' self.hidden_size = hidden_size self.input_size = input_size self.bidirectional = bidirectional self.num_layers = num_layers self.num_classes = num_classes self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional) self.fc = nn.Linear( self.hidden_size * 2 if self.bidirectional else self.hidden_size , self.num_classes )
def __init__(self, ngpu): super(Discriminator, self).__init__() cfg = Cfg() self.ngpu = ngpu nc, ndf = cfg.nc, cfg.ndf self.main = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 8), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*8) x 4 x 4 nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False))
import myNet from data.dataset import MyDataset from config import DefaultConfig as Cfg import torch from torch import optim from torchvision import models option = Cfg() def train(**kwargs): # 定义训练过程 option.update_cfg(**kwargs) print('--------------') print('Running func: train') # 模型: # net_type = getattr(myNet, option.model) # 通过属性名字符串从一个对象中获取一个属性对象, hasattr() 判断是否有这个成员,setattr()设置对象的某个的属性对象 model = myNet.BriefNet() # net_type() # 获取的是类对象,后面需要加上()变成构造函数 if option.use_gpu: model.cuda() # 数据: train_dataset = MyDataset(train=True) val_dataset = MyDataset(test=True) # 目标函数和优化器 if option.use_gpu: criterion = torch.nn.NLLLoss().cuda() else: criterion = torch.nn.NLLLoss()
payload['director'] = fields.get('director') payload['imdb_score'] = fields.get('imdb_score') payload['popularity'] = fields.get('99popularity') movie = Movie(**payload) session.add(movie) session.commit() genre_list = fields.get('genre') for genre in genre_list: name = genre.strip() redis_obj = r.get(name) if not redis_obj: genre = Genre(name=name) movie.genre.append(genre) session.flush() r.set(name, pickle.dumps(genre)) else: genre = pickle.loads(redis_obj) movie.genre.append(genre) session.flush() session.commit() session.close() if __name__ == "__main__": r.flushdb() CONF = Cfg(os.environ.get(constants.STAGE)) Base.metadata.create_all(CONF.engine) base_path = dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = f"{base_path}/database/imdb.json" populate_movies(file_path, CONF.DB())
# encoding=utf-8 import pickle import os import re import pandas as pd import numpy as np from tqdm import tqdm import pkuseg from config import Cfg config = Cfg() seg = pkuseg.pkuseg() cop = re.compile("[^\u4e00-\u9fa5^.^,^,^a-z^A-Z^0-9]") topic_list = ['恋爱关系', '师生关系', '学业方面', '职业发展', '心理方面'] label_dict = { '中性': 0, '正向低': 1, '正向中': 2, '正向高': 3, '负向低': 4, '负向中': 5, '负向高': 6 } reverse_label_dict = {v: k for k, v in label_dict.items()} pad_size = 32 target = r'C:\Users\king\Documents\code\NLP\text_classification\data\lstm_train_corpus1.xlsx' word2vec = pickle.load(open(r'C:\Users\king\Documents\code\NLP\train_word2vec\vec.pkl', 'rb')) def filter_str(desstr, restr=''):
def list_profiles(conf): LOG.log('Available profile(s):') for p in conf.get_profiles(): LOG.sub(p) LOG.log('') def header(): LOG.log(BANNER) LOG.log("") if __name__ == '__main__': ret = True args = docopt(USAGE, version=VERSION) conf = Cfg(args['--cfg']) opts = conf.get_configs() opts['dry'] = args['--dry'] opts['profile'] = args['--profile'] opts['safe'] = not args['--force'] opts['installdiff'] = not args['--nodiff'] opts['link'] = args['--link'] header() try: if args['list']: list_profiles(conf)
for o, a in opts: if o == '-c' or o == '--cfg': configFile = a elif o == '--csv': csvFiles.append(a) elif o == '--ui': uiType = a else: raise Exception('unknown option "%s"' % o) if not(configFile) and not(csvFiles): Usage() sys.exit(1) if configFile: Cfg.loadFile(configFile) # set up basic objects store = DataStore() sourceMgr = SourceManager(store) # start GUI if uiType == 'sdl': from sdl_output import SdlOutput widget = SdlOutput(None, store, sourceMgr) elif uiType == 'qt': from qt_output import QtOutput widget = QtOutput(store, sourceMgr) else: print "invalid UI type '%s'" % uiType Usage()
from wgan import Generator, Discriminator, weights_init from config import Cfg import os import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.autograd import grad import torch.utils.data import torchvision.datasets as dset import torchvision.transforms as transforms import torchvision.utils as vutils if __name__ == "__main__": # Read config cfg = Cfg() # Try to load dataloader from cache dataloader_path = os.path.join(cfg.cache, "dataloader.pt") if cfg.prepared_dataloader and os.path.exists(dataloader_path): dataloader = torch.load(dataloader_path) else: # Create the data set dataset = dset.ImageFolder(root=cfg.dataroot, transform=transforms.Compose([ transforms.Resize(cfg.image_size), transforms.CenterCrop(cfg.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]))
def index(): canvas_uri_raw = request.args.get('canvas') area_xywh = request.args.get('xywh') if not canvas_uri_raw: return abort(400) canvas_uri = urllib.parse.unquote(canvas_uri_raw) cfg = Cfg() db_engine = create_engine(cfg.cfg['db_uri']) q_can = sqla_text(''' SELECT id, manifest_jsonld_id FROM canvases WHERE jsonld_id=:can_uri ''') can_db_tpls = db_engine.execute( q_can, can_uri=canvas_uri ).fetchall() if not can_db_tpls: return abort(404) # FIXME not there, respond accordingly else: if len(can_db_tpls) == 1: can_db_id = int(can_db_tpls[0]['id']) can_db_man_jsonld_id = can_db_tpls[0]['manifest_jsonld_id'] else: print('multiple canvases w/ same ID (!!!)') # FIXME problem area_query_insert = '' if area_xywh: x, y, w, h = [int(elem) for elem in area_xywh.split(',')] poly = ('ST_GeomFromText(' '\'POLYGON(({} {}, {} {}, {} {}, {} {}, {} {}))\')').format( x, y, x+w, y, x+w, y+h, x, y+h, x, y ) area_query_insert = 'ST_Within(area, {}) and '.format(poly) q_area = '''SELECT curations.jsonld_id as uri, areajson FROM curations JOIN (SELECT curation_id, ST_AsGeoJSON(area) as areajson FROM curation_elements WHERE {} canvas_id = {}) as cue ON curations.id = cue.curation_id; '''.format(area_query_insert, can_db_id) cur_uris = db_engine.execute(q_area).fetchall() backlinks_flat = [] for row in cur_uris: uri = row['uri'] area = json.loads(row['areajson']) backlinks_flat.append([uri, area]) # backlinks_by_uri = {} # for bl in backlinks_flat: # uri, area = bl # if uri not in backlinks_by_uri: # backlinks_by_uri[uri] = {'areas':[]} # backlinks_by_uri[uri]['areas'].append(area) backlinks_by_area = {} for bl in backlinks_flat: uri, area = bl coords = area['coordinates'][0] if not len(coords) == 5: print('unexpected polygon shape (!!!)') # FIXME problem p1, p2, p3, p4, p5 = coords xywh = '{},{},{},{}'.format(p1[0], p1[1], p2[0]-p1[0], p3[1]-p1[1]) if xywh not in backlinks_by_area: backlinks_by_area[xywh] = [] backlinks_by_area[xywh].append(uri) display_curation = build_annotation_container_curation( canvas_uri, can_db_man_jsonld_id, backlinks_by_area, request.url, request.base_url) # ret = { # 'canvas': canvas_uri, # 'curations_backlinks': backlinks_by_area # } return jsonify(display_curation)
def build_annotation_container_curation( canvas_uri, containing_manifest_uri, backlinks, query_url, base_url ): """ Build a curation containing a single canvas that is annotated with curation backlinks. """ q_hash = query_hash(query_url) cfg = Cfg() curation_link_prefix = cfg.cfg['curation_link_prefix'] if len(curation_link_prefix) > 0: use_prefix = True else: use_prefix = False marker_settings = cfg.cfg['marker_settings'] if base_url[-1] == '/': base_url = base_url[:-1] cur = OrderedDict() cur['@context'] = ['http://iiif.io/api/presentation/2/context.json', ('http://codh.rois.ac.jp/iiif/curation/1/context.js' 'on')] cur['@type'] = 'cr:Curation' cur['@id'] = query_url cur['viewingHint'] = 'annotation' cur['label'] = 'Tracing Curations for {}'.format(canvas_uri) cur['selections'] = [] sel = OrderedDict() sel['@id'] = '{}/trace/{}/range/{}'.format(base_url, q_hash, uuid.uuid1()) sel['@type'] = 'sc:Range' sel['label'] = 'Temporary range for displaying a canvas' sel['members'] = [] mem = OrderedDict() mem['@id'] = canvas_uri mem['@type'] = 'sc:Canvas' mem['label'] = 'Temporary canvas for displaying annotations' mem['metadata'] = [] for xywh, uris in backlinks.items(): # For every area mtd = OrderedDict() mtd['label'] = 'Annotation' mtd['value'] = [] # Create a single annotation ann = OrderedDict() ann['@id'] = '{}/trace/{}/annotation/{}'.format( base_url, q_hash, uuid.uuid1() ) ann['@type'] = 'oa:Annotation' ann['motivation'] = 'sc:painting' ann['on'] = '{}#xywh={}'.format(canvas_uri, xywh) ann['resource'] = OrderedDict() ann['resource']['@type'] = 'cnt:ContentAsText' ann['resource']['format'] = 'text/html' # With a list of all backlinks to curations backlink_list_chars = '' for i, uri in enumerate(uris): if i > 0: backlink_list_chars += ',<br>' backlink_uri = uri if use_prefix: backlink_uri = '{}{}'.format( curation_link_prefix, uri ) backlink_list_chars += '<a href="{}">Curation {}</a>'.format( backlink_uri, i+1 ) ann['resource']['chars'] = backlink_list_chars ann['resource']['marker'] = OrderedDict() for key, val in marker_settings.items(): ann['resource']['marker'][key] = val mtd['value'].append(copy.deepcopy(ann)) mem['metadata'].append(copy.deepcopy(mtd)) sel['members'].append(copy.deepcopy(mem)) sel['within'] = OrderedDict() sel['within']['@id'] = containing_manifest_uri sel['within']['@type'] = 'sc:Manifest' sel['within']['label'] = 'Temporary manifest for displaying a canvas' cur['selections'].append(copy.deepcopy(sel)) return cur
from DataProcessing import * from config import Cfg videoPath = '/home/thomas/DeepLearning/RAN/train/ILSVRC2015_VID_train_0000/ILSVRC2015_train_00011000' vid, w, h = ProcessVideo(videoPath) dataProcessor = DataProcessor(w, h, Cfg()) trainingData = dataProcessor.GetTrainingData(vid.TrackedObjects) trainingBatch = MakeBatch(trainingData) print(trainingBatch)
from starlette.responses import JSONResponse, HTMLResponse from starlette.authentication import (AuthenticationBackend, AuthenticationError, SimpleUser, UnauthenticatedUser, AuthCredentials) from starlette.routing import Route import base64 import binascii import os from config import Cfg, constants from utils.processor import DatabaseServicer from utils.exceptions import * import redis r = redis.from_url(os.environ['REDIS_URL']) CONF = Cfg(os.environ.get(constants.STAGE)) class BasicAuthBackend(AuthenticationBackend): async def authenticate(self, request): """ This method validates the authenticity of the request based on the authorization header. Authorization Header: a base64 encoded string of the Admin's username (e.g: token c2hla2hhcg== ) """ if "Authorization" not in request.headers: return auth = request.headers["Authorization"] auth = auth.strip() try: secret = base64.b64decode(auth).decode("utf-8")
def __init__(self, data): super(MyTrainValData, self).__init__() self.data = data self.embeddings = pickle.load( open(os.path.join(Cfg().word2vec_from_scratch, 'emb_weights.pkl'), 'rb'))
for o, a in opts: if o == '-c' or o == '--cfg': configFile = a elif o == '--csv': csvFiles.append(a) elif o == '--ui': uiType = a else: raise Exception('unknown option "%s"' % o) if not (configFile) and not (csvFiles): Usage() sys.exit(1) if configFile: Cfg.loadFile(configFile) # set up basic objects store = DataStore() sourceMgr = SourceManager(store) # start GUI if uiType == 'sdl': from sdl_output import SdlOutput widget = SdlOutput(None, store, sourceMgr) elif uiType == 'qt': from qt_output import QtOutput widget = QtOutput(store, sourceMgr) else: print "invalid UI type '%s'" % uiType Usage()