def init(config): batchsize = config['train']['batchsize'] current_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(current_path) import ref as ds ds.init() train, valid = ds.setup_val_split() # 挑选有keypoints的样本作为数据集 dataset = { key: Dataset(config, ds, data) for key, data in zip( ['train', 'valid'], [train, valid] ) } use_data_loader = config['train']['use_data_loader'] loaders = {} for key in dataset: loaders[key] = torch.utils.data.DataLoader(dataset[key], batch_size=batchsize, shuffle=True, num_workers=config['train']['num_workers'], pin_memory=False) def gen(phase): batchsize = config['train']['batchsize'] batchnum = config['train']['{}_iters'.format(phase)] loader = loaders[phase].__iter__() for i in range(batchnum): imgs, masks, keypoints, heatmaps = next(loader) yield { 'imgs': imgs, 'masks': masks, 'heatmaps': heatmaps, 'keypoints': keypoints } return lambda key: gen(key)
def setUp(self): self.tempdir = tempfile.mkdtemp() base_dir = os.path.join(self.tempdir, 'ref') shutil.copytree('data/ref/', base_dir) ref.init(base_dir) self.documents = \ {1: {'author': 'Paterek', 'bibtex': '@inproceedings{paterek2007improving,\n title={Improving regularized singular value decomposition for collaborative filtering},\n author={Paterek, A.},\n booktitle={Proceedings of KDD Cup and Workshop},\n volume={2007},\n pages={5--8},\n year={2007}\n}\n', 'docid': 1, 'filename': 'Paterek - 2007 - Improving regularized singular value decomposition for collaborative filtering - 1.pdf', 'journal': 'Proceedings of KDD Cup and Workshop', 'notes': '', 'rating': 'U', 'tags': '', 'title': 'Improving regularized singular value decomposition for collaborative filtering', 'year': 2007}, 2: {'author': 'Yu, Lo, Hsieh, Lou, McKenzie, Chou, Chung, Ho, Chang, Wei, others', 'bibtex': '@inproceedings{yu2010feature,\n title={Feature engineering and classifier ensemble for KDD cup 2010},\n author={Yu, H.F. and Lo, H.Y. and Hsieh, H.P. and Lou, J.K. and McKenzie, T.G. and Chou, J.W. and Chung, P.H. and Ho, C.H. and Chang, C.F. and Wei, Y.H. and others},\n booktitle={Proceedings of the KDD Cup 2010 Workshop},\n pages={1--16},\n year={2010}\n}\n', 'docid': 2, 'filename': 'Yu et al - 2010 - Feature engineering and classifier ensemble for KDD cup 2010 - 2.pdf', 'journal': 'Proceedings of the KDD Cup 2010 Workshop', 'notes': '', 'rating': 'U', 'tags': '', 'title': 'Feature engineering and classifier ensemble for KDD cup 2010', 'year': 2010}}
def init(config): batchsize = config['train']['batchsize'] current_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(current_path) import ref as ds ds.init() train, valid = ds.setup_val_split() dataset = { key: Dataset(config, ds, data) for key, data in zip( ['train', 'valid'], [train, valid] ) } use_data_loader = config['train']['use_data_loader'] loaders = {} for key in dataset: loaders[key] = torch.utils.data.DataLoader(dataset[key], batch_size=batchsize, shuffle=True, num_workers=config['train']['num_workers'], pin_memory=False) def gen(phase): batchsize = config['train']['batchsize'] batchnum = config['train']['{}_iters'.format(phase)] loader = loaders[phase].__iter__() for i in range(batchnum): imgs, masks, keypoints, heatmaps = next(loader) yield { 'imgs': imgs, 'masks': masks, 'heatmaps': heatmaps, 'keypoints': keypoints } return lambda key: gen(key)
def setUpClass(cls): cls.tempdir = tempfile.mkdtemp() base_dir = os.path.join(cls.tempdir, 'ref') ref.init(base_dir) # generate random words def r(n, m, words=re.sub('\W+', ' ', open('data/kdd08koren.txt').read()).split()): return ' '.join(choice(words) for i in range(randint(n, m))) all_tags = [r(1, 2) for i in range(100)] ref.con.execute('BEGIN') for i in range(1000): title = r(5, 10) author = ' and '.join(r(1, 2) for _ in range(randint(1, 5))) year = str(randint(1800, 2000)) journal = r(1, 5) rating = str(randint(1, 10)) q = random() if q < 0.2: fulltext = r(50000, 200000) elif q < 0.8: fulltext = r(1000, 15000) else: fulltext = '' notes = textwrap.fill(r(0, 100)) tags = '; '.join(sample(all_tags, randint(0, 3))) o = '\n '.join( r(1, 1) + '=' + r(1, 5) for i in range(randint(0, 6))) bibtex = '''@book{{foo\n title={},\n author={},\n year={},\n journal={},\n {}}}\n'''.format( title, author, year, journal, o) if random() < 0.1: title = author = year = journal = bibtex = '' c = ref.con.execute('INSERT INTO fulltext VALUES (?)', (fulltext, )) lastrowid = c.lastrowid doc = { 'author': author, 'year': year, 'title': title, 'docid': lastrowid, 'filename': '' } filename = ref.get_filename(doc) c = ref.con.execute( 'INSERT INTO documents VALUES (?,?,?,?,?,?,?,?,?,?)', (None, tags, title, author, year, rating, journal, filename, notes, bibtex)) assert lastrowid == c.lastrowid ref.con.execute('COMMIT')
def init(config): batchsize = config['train']['batchsize'] current_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(current_path) import ref as ds ds.init() train, valid = ds.setup_val_split() dataset = { key: Dataset(config, ds, data) for key, data in zip(['train', 'valid'], [train, valid]) } use_data_loader = config['train']['use_data_loader'] loaders = {} for key in dataset: loaders[key] = torch.utils.data.DataLoader( dataset[key], batch_size=batchsize, shuffle=True, num_workers=config['train']['num_workers'], pin_memory=False) def gen(phase): batchsize = config['train']['batchsize'] batchnum = config['train']['{}_iters'.format(phase)] loader = loaders[phase].__iter__() for i in range(batchnum): try: imgs, heatmaps = next(loader) except StopIteration: # to avoid no data provided by dataloader loader = loaders[phase].__iter__() imgs, heatmaps = next(loader) yield { 'imgs': imgs, #cropped and augmented 'heatmaps': heatmaps, #based on keypoints. 0 if not in img for joint } return lambda key: gen(key)
def setUpClass(cls): cls.tempdir = tempfile.mkdtemp() base_dir = os.path.join(cls.tempdir, 'ref') ref.init(base_dir) # generate random words def r(n, m, words=re.sub('\W+', ' ', open('data/kdd08koren.txt').read()).split()): return ' '.join(choice(words) for i in range(randint(n, m))) all_tags = [r(1, 2) for i in range(100)] ref.con.execute('BEGIN') for i in range(1000): title = r(5, 10) author = ' and '.join(r(1, 2) for _ in range(randint(1, 5))) year = str(randint(1800, 2000)) journal = r(1, 5) rating = str(randint(1, 10)) q = random() if q < 0.2: fulltext = r(50000, 200000) elif q < 0.8: fulltext = r(1000, 15000) else: fulltext = '' notes = textwrap.fill(r(0, 100)) tags = '; '.join(sample(all_tags, randint(0, 3))) o = '\n '.join(r(1, 1) + '=' + r(1, 5) for i in range(randint(0, 6))) bibtex = '''@book{{foo\n title={},\n author={},\n year={},\n journal={},\n {}}}\n'''.format( title, author, year, journal, o) if random() < 0.1: title = author = year = journal = bibtex = '' c = ref.con.execute('INSERT INTO fulltext VALUES (?)', (fulltext,)) lastrowid = c.lastrowid doc = {'author': author, 'year': year, 'title': title, 'docid': lastrowid, 'filename': ''} filename = ref.get_filename(doc) c = ref.con.execute('INSERT INTO documents VALUES (?,?,?,?,?,?,?,?,?,?)', (None, tags, title, author, year, rating, journal, filename, notes, bibtex)) assert lastrowid == c.lastrowid ref.con.execute('COMMIT')
def init(opts): batchsize = opts.batchsize current_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(current_path) import ref as ds ds.init() train, valid = ds.setup_val_split() dataset = { key: Dataset(opts, ds, data) for key, data in zip(['train', 'valid'], [train, valid]) } loaders = {} for key in dataset: loaders[key] = torch.utils.data.DataLoader( dataset[key], batch_size=batchsize, shuffle=True, num_workers=opts.num_workers, pin_memory=False) def gen(phase): batchsize = opts.batchsize if phase == 'train': batchnum = opts.train_iters else: batchnum = opts.valid_iters loader = loaders[phase].__iter__() for i in range(batchnum): imgs, masks, keypoints, heatmaps = next(loader) yield { 'imgs': imgs, 'masks': masks, 'heatmaps': heatmaps, 'keypoints': keypoints } return lambda key: gen(key)
def toggle_unread(): for i, line in enumerate(info_buf): if line.startswith('rating='): info_buf[i] = 'rating=' + ('' if info_buf[i].endswith('U') else 'U') save_info(parse_info()) def order_documents(o): global order order = o last_select_cmd() ref.init() order = 'docid DESC' headers = 'docid', 'rating', 'author', 'title', 'year' tags = ref.get_tags() col_size = {} c = vim.command c('set buftype=nofile') c('set bufhidden=hide') c('setlocal noswapfile') c('file main') main_buf, main_win = vim.current.buffer, vim.current.window c('below new info') c('set buftype=nofile') c('set bufhidden=hide')
def main(): ref.init() with open(ref.ref_dir + '/valid_id', 'r') as f: valid_id = list(map(lambda x: int(x.strip()), f.readlines())) pickle.dump(build(valid_id, ref), open(ref.ref_dir + '/validation.pkl', 'wb'))
""" Use imagemagick to convert all pfds to a sequence of thumbnail images requires: sudo apt-get install imagemagick BASED ON ANDREJ KARPATHY'S SCRIPT https://github.com/karpathy/arxiv-sanity-preserver/blob/master/thumb_pdf.py but added in my personal `ref` stuff https://github.com/jzbontar/ref """ import os import time import shutil import subprocess import ref; ref.init() publist = """ chen2018big, 1807.03848, false mroueh2018regularized, 1805.12062, false dognin2018improved, 1805.00063, false sercu2017semi, 1712.02505, true mroueh2017sobolev, 1711.04894, false saon2017english, 1703.02136, false sercu2017network, , false mroueh2017fisher, 1705.09675, true mroueh2017mcgan, 1702.08398, false sercu2016dense, 1611.09288, true sercu2016advances, 1604.01792, false saon2016ibm, 1604.08242, false sercu2015very, 1509.08967, true """ publist = [ [x.strip() for x in entry.split(',')] for entry in publist.strip().split('\n')]