Exemple #1
0
def init(config):
    batchsize = config['train']['batchsize']
    current_path = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(current_path)
    import ref as ds
    ds.init()

    train, valid = ds.setup_val_split() # 挑选有keypoints的样本作为数据集
    dataset = { key: Dataset(config, ds, data) for key, data in zip( ['train', 'valid'], [train, valid] ) }

    use_data_loader = config['train']['use_data_loader']

    loaders = {}
    for key in dataset:
        loaders[key] = torch.utils.data.DataLoader(dataset[key], batch_size=batchsize, shuffle=True, num_workers=config['train']['num_workers'], pin_memory=False)

    def gen(phase):
        batchsize = config['train']['batchsize']
        batchnum = config['train']['{}_iters'.format(phase)]
        loader = loaders[phase].__iter__()
        for i in range(batchnum):
            imgs, masks, keypoints, heatmaps = next(loader)
            yield {
                'imgs': imgs,
                'masks': masks,
                'heatmaps': heatmaps,
                'keypoints': keypoints
            }


    return lambda key: gen(key)
Exemple #2
0
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        base_dir = os.path.join(self.tempdir, 'ref')
        shutil.copytree('data/ref/', base_dir)
        ref.init(base_dir)

        self.documents = \
             {1: {'author': 'Paterek',
                 'bibtex': '@inproceedings{paterek2007improving,\n  title={Improving regularized singular value decomposition for collaborative filtering},\n  author={Paterek, A.},\n  booktitle={Proceedings of KDD Cup and Workshop},\n  volume={2007},\n  pages={5--8},\n  year={2007}\n}\n',
                 'docid': 1,
                 'filename': 'Paterek - 2007 - Improving regularized singular value decomposition for collaborative filtering - 1.pdf',
                 'journal': 'Proceedings of KDD Cup and Workshop',
                 'notes': '',
                 'rating': 'U',
                 'tags': '',
                 'title': 'Improving regularized singular value decomposition for collaborative filtering',
                 'year': 2007},
             2: {'author': 'Yu, Lo, Hsieh, Lou, McKenzie, Chou, Chung, Ho, Chang, Wei, others',
                 'bibtex': '@inproceedings{yu2010feature,\n  title={Feature engineering and classifier ensemble for KDD cup 2010},\n  author={Yu, H.F. and Lo, H.Y. and Hsieh, H.P. and Lou, J.K. and McKenzie, T.G. and Chou, J.W. and Chung, P.H. and Ho, C.H. and Chang, C.F. and Wei, Y.H. and others},\n  booktitle={Proceedings of the KDD Cup 2010 Workshop},\n  pages={1--16},\n  year={2010}\n}\n',
                 'docid': 2,
                 'filename': 'Yu et al - 2010 - Feature engineering and classifier ensemble for KDD cup 2010 - 2.pdf',
                 'journal': 'Proceedings of the KDD Cup 2010 Workshop',
                 'notes': '',
                 'rating': 'U',
                 'tags': '',
                 'title': 'Feature engineering and classifier ensemble for KDD cup 2010',
                 'year': 2010}}
Exemple #3
0
def init(config):
    batchsize = config['train']['batchsize']
    current_path = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(current_path)
    import ref as ds
    ds.init()

    train, valid = ds.setup_val_split()
    dataset = { key: Dataset(config, ds, data) for key, data in zip( ['train', 'valid'], [train, valid] ) }

    use_data_loader = config['train']['use_data_loader']

    loaders = {}
    for key in dataset:
        loaders[key] = torch.utils.data.DataLoader(dataset[key], batch_size=batchsize, shuffle=True, num_workers=config['train']['num_workers'], pin_memory=False)

    def gen(phase):
        batchsize = config['train']['batchsize']
        batchnum = config['train']['{}_iters'.format(phase)]
        loader = loaders[phase].__iter__()
        for i in range(batchnum):
            imgs, masks, keypoints, heatmaps = next(loader)
            yield {
                'imgs': imgs,
                'masks': masks,
                'heatmaps': heatmaps,
                'keypoints': keypoints
            }


    return lambda key: gen(key)
Exemple #4
0
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        base_dir = os.path.join(self.tempdir, 'ref')
        shutil.copytree('data/ref/', base_dir)
        ref.init(base_dir)

        self.documents = \
             {1: {'author': 'Paterek',
                 'bibtex': '@inproceedings{paterek2007improving,\n  title={Improving regularized singular value decomposition for collaborative filtering},\n  author={Paterek, A.},\n  booktitle={Proceedings of KDD Cup and Workshop},\n  volume={2007},\n  pages={5--8},\n  year={2007}\n}\n',
                 'docid': 1,
                 'filename': 'Paterek - 2007 - Improving regularized singular value decomposition for collaborative filtering - 1.pdf',
                 'journal': 'Proceedings of KDD Cup and Workshop',
                 'notes': '',
                 'rating': 'U',
                 'tags': '',
                 'title': 'Improving regularized singular value decomposition for collaborative filtering',
                 'year': 2007},
             2: {'author': 'Yu, Lo, Hsieh, Lou, McKenzie, Chou, Chung, Ho, Chang, Wei, others',
                 'bibtex': '@inproceedings{yu2010feature,\n  title={Feature engineering and classifier ensemble for KDD cup 2010},\n  author={Yu, H.F. and Lo, H.Y. and Hsieh, H.P. and Lou, J.K. and McKenzie, T.G. and Chou, J.W. and Chung, P.H. and Ho, C.H. and Chang, C.F. and Wei, Y.H. and others},\n  booktitle={Proceedings of the KDD Cup 2010 Workshop},\n  pages={1--16},\n  year={2010}\n}\n',
                 'docid': 2,
                 'filename': 'Yu et al - 2010 - Feature engineering and classifier ensemble for KDD cup 2010 - 2.pdf',
                 'journal': 'Proceedings of the KDD Cup 2010 Workshop',
                 'notes': '',
                 'rating': 'U',
                 'tags': '',
                 'title': 'Feature engineering and classifier ensemble for KDD cup 2010',
                 'year': 2010}}
Exemple #5
0
    def setUpClass(cls):
        cls.tempdir = tempfile.mkdtemp()
        base_dir = os.path.join(cls.tempdir, 'ref')
        ref.init(base_dir)

        # generate random words
        def r(n,
              m,
              words=re.sub('\W+', ' ',
                           open('data/kdd08koren.txt').read()).split()):
            return ' '.join(choice(words) for i in range(randint(n, m)))

        all_tags = [r(1, 2) for i in range(100)]
        ref.con.execute('BEGIN')
        for i in range(1000):
            title = r(5, 10)
            author = ' and '.join(r(1, 2) for _ in range(randint(1, 5)))
            year = str(randint(1800, 2000))
            journal = r(1, 5)
            rating = str(randint(1, 10))
            q = random()
            if q < 0.2:
                fulltext = r(50000, 200000)
            elif q < 0.8:
                fulltext = r(1000, 15000)
            else:
                fulltext = ''
            notes = textwrap.fill(r(0, 100))
            tags = '; '.join(sample(all_tags, randint(0, 3)))
            o = '\n  '.join(
                r(1, 1) + '=' + r(1, 5) for i in range(randint(0, 6)))
            bibtex = '''@book{{foo\n title={},\n author={},\n year={},\n journal={},\n {}}}\n'''.format(
                title, author, year, journal, o)
            if random() < 0.1:
                title = author = year = journal = bibtex = ''

            c = ref.con.execute('INSERT INTO fulltext VALUES (?)',
                                (fulltext, ))
            lastrowid = c.lastrowid
            doc = {
                'author': author,
                'year': year,
                'title': title,
                'docid': lastrowid,
                'filename': ''
            }
            filename = ref.get_filename(doc)
            c = ref.con.execute(
                'INSERT INTO documents VALUES (?,?,?,?,?,?,?,?,?,?)',
                (None, tags, title, author, year, rating, journal, filename,
                 notes, bibtex))
            assert lastrowid == c.lastrowid
        ref.con.execute('COMMIT')
Exemple #6
0
def init(config):
    batchsize = config['train']['batchsize']
    current_path = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(current_path)
    import ref as ds
    ds.init()

    train, valid = ds.setup_val_split()
    dataset = {
        key: Dataset(config, ds, data)
        for key, data in zip(['train', 'valid'], [train, valid])
    }

    use_data_loader = config['train']['use_data_loader']

    loaders = {}
    for key in dataset:
        loaders[key] = torch.utils.data.DataLoader(
            dataset[key],
            batch_size=batchsize,
            shuffle=True,
            num_workers=config['train']['num_workers'],
            pin_memory=False)

    def gen(phase):
        batchsize = config['train']['batchsize']
        batchnum = config['train']['{}_iters'.format(phase)]
        loader = loaders[phase].__iter__()
        for i in range(batchnum):
            try:
                imgs, heatmaps = next(loader)
            except StopIteration:
                # to avoid no data provided by dataloader
                loader = loaders[phase].__iter__()
                imgs, heatmaps = next(loader)
            yield {
                'imgs': imgs,  #cropped and augmented
                'heatmaps':
                heatmaps,  #based on keypoints. 0 if not in img for joint
            }

    return lambda key: gen(key)
Exemple #7
0
    def setUpClass(cls):
        cls.tempdir = tempfile.mkdtemp()
        base_dir = os.path.join(cls.tempdir, 'ref')
        ref.init(base_dir)

        # generate random words
        def r(n, m, words=re.sub('\W+', ' ', open('data/kdd08koren.txt').read()).split()):
            return ' '.join(choice(words) for i in range(randint(n, m)))

        all_tags = [r(1, 2) for i in range(100)]
        ref.con.execute('BEGIN')
        for i in range(1000):
            title = r(5, 10)
            author = ' and '.join(r(1, 2) for _ in range(randint(1, 5)))
            year = str(randint(1800, 2000))
            journal = r(1, 5)
            rating = str(randint(1, 10))
            q = random()
            if q < 0.2:
                fulltext = r(50000, 200000)
            elif q < 0.8:
                fulltext = r(1000, 15000)
            else:
                fulltext = ''
            notes = textwrap.fill(r(0, 100))
            tags = '; '.join(sample(all_tags, randint(0, 3)))
            o = '\n  '.join(r(1, 1) + '=' + r(1, 5) for i in range(randint(0, 6)))
            bibtex = '''@book{{foo\n title={},\n author={},\n year={},\n journal={},\n {}}}\n'''.format(
                title, author, year, journal, o)
            if random() < 0.1:
                title = author = year = journal = bibtex = ''

            c = ref.con.execute('INSERT INTO fulltext VALUES (?)', (fulltext,))
            lastrowid = c.lastrowid
            doc = {'author': author, 'year': year, 'title': title, 'docid': lastrowid, 'filename': ''}
            filename = ref.get_filename(doc)
            c = ref.con.execute('INSERT INTO documents VALUES (?,?,?,?,?,?,?,?,?,?)',
                (None, tags, title, author, year, rating, journal, filename,
                notes, bibtex))
            assert lastrowid == c.lastrowid
        ref.con.execute('COMMIT')
def init(opts):
    batchsize = opts.batchsize
    current_path = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(current_path)
    import ref as ds
    ds.init()

    train, valid = ds.setup_val_split()
    dataset = {
        key: Dataset(opts, ds, data)
        for key, data in zip(['train', 'valid'], [train, valid])
    }

    loaders = {}
    for key in dataset:
        loaders[key] = torch.utils.data.DataLoader(
            dataset[key],
            batch_size=batchsize,
            shuffle=True,
            num_workers=opts.num_workers,
            pin_memory=False)

    def gen(phase):
        batchsize = opts.batchsize
        if phase == 'train':
            batchnum = opts.train_iters
        else:
            batchnum = opts.valid_iters
        loader = loaders[phase].__iter__()
        for i in range(batchnum):
            imgs, masks, keypoints, heatmaps = next(loader)
            yield {
                'imgs': imgs,
                'masks': masks,
                'heatmaps': heatmaps,
                'keypoints': keypoints
            }

    return lambda key: gen(key)
Exemple #9
0
def toggle_unread():
    for i, line in enumerate(info_buf):
        if line.startswith('rating='):
            info_buf[i] = 'rating=' + (''
                                       if info_buf[i].endswith('U') else 'U')
    save_info(parse_info())


def order_documents(o):
    global order

    order = o
    last_select_cmd()


ref.init()

order = 'docid DESC'
headers = 'docid', 'rating', 'author', 'title', 'year'
tags = ref.get_tags()
col_size = {}

c = vim.command
c('set buftype=nofile')
c('set bufhidden=hide')
c('setlocal noswapfile')
c('file main')
main_buf, main_win = vim.current.buffer, vim.current.window
c('below new info')
c('set buftype=nofile')
c('set bufhidden=hide')
Exemple #10
0
def main():
    ref.init()
    with open(ref.ref_dir + '/valid_id', 'r') as f:
        valid_id = list(map(lambda x: int(x.strip()), f.readlines()))
    pickle.dump(build(valid_id, ref),
                open(ref.ref_dir + '/validation.pkl', 'wb'))
Exemple #11
0
def toggle_unread():
    for i, line in enumerate(info_buf):
        if line.startswith('rating='):
            info_buf[i] = 'rating=' + ('' if info_buf[i].endswith('U') else 'U')
    save_info(parse_info())
    


def order_documents(o):
    global order

    order = o
    last_select_cmd()

ref.init()

order = 'docid DESC'
headers = 'docid', 'rating', 'author', 'title', 'year'
tags = ref.get_tags()
col_size = {}

c = vim.command
c('set buftype=nofile')
c('set bufhidden=hide')
c('setlocal noswapfile')
c('file main')
main_buf, main_win = vim.current.buffer, vim.current.window
c('below new info') 
c('set buftype=nofile')
c('set bufhidden=hide')
"""
Use imagemagick to convert all pfds to a sequence of thumbnail images
requires: sudo apt-get install imagemagick
BASED ON ANDREJ KARPATHY'S SCRIPT https://github.com/karpathy/arxiv-sanity-preserver/blob/master/thumb_pdf.py
but added in my personal `ref` stuff https://github.com/jzbontar/ref
"""

import os
import time
import shutil
import subprocess

import ref; ref.init()

publist = """
chen2018big, 1807.03848, false
mroueh2018regularized, 1805.12062, false
dognin2018improved, 1805.00063, false
sercu2017semi, 1712.02505, true
mroueh2017sobolev, 1711.04894, false
saon2017english, 1703.02136, false
sercu2017network, , false
mroueh2017fisher, 1705.09675, true
mroueh2017mcgan, 1702.08398, false
sercu2016dense, 1611.09288, true
sercu2016advances, 1604.01792, false
saon2016ibm, 1604.08242, false
sercu2015very, 1509.08967, true
"""
publist = [ [x.strip() for x in entry.split(',')] for entry in publist.strip().split('\n')]