Example #1
0
def debug_channel(pname, cname, parse_refs=True, entry_url=None):
    model = load_model(pname, STHREAD_PATH, cname)
    channel = SupportChannel.objects.filter(project__dir_name=pname).\
            get(dir_name=cname)
    pm = CLIProgressMonitor()
    generic_parser.debug_channel(channel, model, progress_monitor=pm,
            parse_refs=parse_refs, entry_url=entry_url)
    return channel
Example #2
0
def sync_doc(pname, dname, release):
    doc_key = dname + release
    doc_path = get_doc_path(pname, dname, release)
    model = load_model(pname, DOC_PATH, doc_key)
    syncer = import_clazz(model.syncer)(model.input_url, doc_path)
    pages = syncer.sync()
    model.pages = pages
    dump_model(model, pname, DOC_PATH, doc_key)
Example #3
0
def toc_refresh(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    try:
        syncer = import_clazz(model.syncer_clazz)()
        syncer.toc_refresh(model)
        dump_model(model, pname, STHREAD_PATH, cname)
    except Exception:
        logger.exception('Error while refreshing toc')
Example #4
0
def toc_refresh(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    try:
        syncer = import_clazz(model.syncer_clazz)()
        syncer.toc_refresh(model)
        dump_model(model, pname, STHREAD_PATH, cname)
    except Exception:
        logger.exception('Error while refreshing toc')
Example #5
0
def sync_doc(pname, dname, release):
    doc_key = dname + release
    doc_path = get_doc_path(pname, dname, release)
    model = load_model(pname, DOC_PATH, doc_key)
    syncer = import_clazz(model.syncer)(model.input_url, doc_path)
    pages = syncer.sync()
    model.pages = pages
    dump_model(model, pname, DOC_PATH, doc_key)
Example #6
0
    def test_phpbb_syncer(self):
        create_channel_db('project1', 'cf', 'coreforum',
                'channel.syncer.common_syncers.PHPBBForumSyncer', 'foo.parser',
                'https://forum.hibernate.org/viewforum.php?f=1'
                )
        create_channel_local('project1', 'coreforum',
                'channel.syncer.common_syncers.PHPBBForumSyncer',
                'https://forum.hibernate.org/viewforum.php?f=1'
                )
        pname = 'project1'
        cname = 'coreforum'
        toc_refresh(pname, cname)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertEqual(
                'https://forum.hibernate.org/viewforum.php?f=1&sd=a&start=0',
                model.toc_sections[0].url)
        self.assertFalse(model.toc_sections[0].downloaded)
        self.assertTrue(len(model.toc_sections) > 2349)
        for i in xrange(0, 2349):
            self.assertEqual(i, model.toc_sections[i].index)

        toc_download_section(pname, cname, start=0, end=4)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertTrue(model.toc_sections[0].downloaded)
        self.assertTrue(model.toc_sections[1].downloaded)
        self.assertTrue(model.toc_sections[2].downloaded)
        self.assertTrue(model.toc_sections[3].downloaded)
        self.assertFalse(model.toc_sections[4].downloaded)
        self.assertEqual(100, len(model.entries))
        self.assertEqual(0, model.entries[0].index)
        self.assertFalse(model.entries[0].downloaded)
        self.assertEqual(1000, model.entries[25].index)
        self.assertEqual(1001, model.entries[26].index)
        self.assertTrue(model.entries[26].url.find('t=59') > -1)

        toc_download_entries(pname, cname, 1024, 1025)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertTrue(model.entries[49].downloaded)
        self.assertFalse(model.entries[50].downloaded)
        path = os.path.join(settings.PROJECT_FS_ROOT,
                model.entries[49].local_paths[0])
        self.assertTrue(os.path.exists(path))
        path = os.path.join(settings.PROJECT_FS_ROOT,
                model.entries[49].local_paths[1])
        self.assertTrue(os.path.exists(path))
Example #7
0
    def test_fudeclipse_syncer(self):
        create_channel_db('project1', 'cf', 'coreforum',
                'channel.syncer.common_syncers.FUDEclipseForumSyncer',
                'foo.parser',
                'http://www.eclipse.org/forums/index.php/sf/thread/13/'
                )
        create_channel_local('project1', 'coreforum',
                'channel.syncer.common_syncers.FUDEclipseForumSyncer',
                'http://www.eclipse.org/forums/index.php/sf/thread/13/'
                )
        pname = 'project1'
        cname = 'coreforum'
        toc_refresh(pname, cname)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertEqual(
                'http://www.eclipse.org/forums/index.php/sf/thread/13/1/0/',
                model.toc_sections[0].url)
        self.assertFalse(model.toc_sections[0].downloaded)

        self.assertTrue(len(model.toc_sections) >= 247)
        for i in xrange(0, 247):
            self.assertEqual(i, model.toc_sections[i].index)

        toc_download_section(pname, cname, start=0, end=4)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertTrue(model.toc_sections[0].downloaded)
        self.assertTrue(model.toc_sections[1].downloaded)
        self.assertTrue(model.toc_sections[2].downloaded)
        self.assertTrue(model.toc_sections[3].downloaded)
        self.assertFalse(model.toc_sections[4].downloaded)

        self.assertEqual(160, len(model.entries))
        self.assertEqual(0, model.entries[0].index)
        self.assertFalse(model.entries[0].downloaded)
        self.assertEqual(1000, model.entries[40].index)
        self.assertEqual(1001, model.entries[41].index)
        #self.assertTrue(model.entries[26].url.find('t=59') > -1)

        toc_download_entries(pname, cname, 1039, 1040)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertTrue(model.entries[79].downloaded)
        self.assertFalse(model.entries[80].downloaded)
        path = os.path.join(settings.PROJECT_FS_ROOT,
                model.entries[79].local_paths[0])
        self.assertTrue(os.path.exists(path))
Example #8
0
def parse_channel(pname, cname, parse_refs=True):
    model = load_model(pname, STHREAD_PATH, cname)
    channel = SupportChannel.objects.filter(project__dir_name=pname).\
            get(dir_name=cname)
    pm = CLIProgressMonitor()
    generic_parser.parse_channel(channel, model, progress_monitor=pm,
            parse_refs=parse_refs)
    dump_model(model, pname, STHREAD_PATH, cname)
    return channel
Example #9
0
    def test_apache_syncer(self):
        create_channel_db('project1', 'cf', 'coreforum',
                'channel.syncer.common_syncers.ApacheMailSyncer', 'foo.parser',
                'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/'
                )
        create_channel_local('project1', 'coreforum',
                'channel.syncer.common_syncers.ApacheMailSyncer',
                'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/'
                )
        pname = 'project1'
        cname = 'coreforum'
        toc_refresh(pname, cname)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertEqual(
                'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/200410.mbox/date',
                model.toc_sections[0].url)
        self.assertFalse(model.toc_sections[0].downloaded)
        self.assertTrue(len(model.toc_sections) >= 79)
        for i in xrange(0, 79):
            self.assertEqual(i, model.toc_sections[i].index)

        toc_download_section(pname, cname, start=0, end=4)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertTrue(model.toc_sections[0].downloaded)
        self.assertTrue(model.toc_sections[1].downloaded)
        self.assertTrue(model.toc_sections[2].downloaded)
        self.assertTrue(model.toc_sections[3].downloaded)
        self.assertFalse(model.toc_sections[4].downloaded)
        self.assertEqual(316, len(model.entries))
        self.assertEqual(0, model.entries[0].index)
        self.assertFalse(model.entries[0].downloaded)
        self.assertEqual(1000, model.entries[17].index)
        self.assertEqual(1001, model.entries[18].index)
        self.assertTrue(model.entries[18].url.find('xbox.localdomain') > -1)

        toc_download_entries(pname, cname, 0, 1)
        model = load_model(pname, STHREAD_PATH, cname)
        self.assertTrue(model.entries[0].downloaded)
        self.assertFalse(model.entries[1].downloaded)
        path = os.path.join(settings.PROJECT_FS_ROOT,
                model.entries[0].local_paths[0])
        self.assertTrue(os.path.exists(path))
Example #10
0
def parse_channel(pname, cname, parse_refs=True):
    model = load_model(pname, STHREAD_PATH, cname)
    channel = SupportChannel.objects.filter(project__dir_name=pname).\
            get(dir_name=cname)
    pm = CLIProgressMonitor()
    generic_parser.parse_channel(channel,
                                 model,
                                 progress_monitor=pm,
                                 parse_refs=parse_refs)
    dump_model(model, pname, STHREAD_PATH, cname)
    return channel
Example #11
0
def parse_doc(pname, dname, release, parse_refs=True):
    prelease = ProjectRelease.objects.filter(project__dir_name=pname).\
            filter(release=release)[0]
    document = Document.objects.filter(project_release=prelease).\
            filter(title=dname)[0]
    doc_key = dname + release
    model = load_model(pname, DOC_PATH, doc_key)
    progress_monitor = CLIProgressMonitor()
    parse(document, model.pages, parse_refs, progress_monitor)

    return document
Example #12
0
def debug_channel(pname, cname, parse_refs=True, entry_url=None):
    model = load_model(pname, STHREAD_PATH, cname)
    channel = SupportChannel.objects.filter(project__dir_name=pname).\
            get(dir_name=cname)
    pm = CLIProgressMonitor()
    generic_parser.debug_channel(channel,
                                 model,
                                 progress_monitor=pm,
                                 parse_refs=parse_refs,
                                 entry_url=entry_url)
    return channel
Example #13
0
def parse_doc(pname, dname, release, parse_refs=True):
    prelease = ProjectRelease.objects.filter(project__dir_name=pname).\
            filter(release=release)[0]
    document = Document.objects.filter(project_release=prelease).\
            filter(title=dname)[0]
    doc_key = dname + release
    model = load_model(pname, DOC_PATH, doc_key)
    progress_monitor = CLIProgressMonitor()
    parse(document, model.pages, parse_refs, progress_monitor)

    return document
Example #14
0
 def test_sync_doc_remote(self):
     pname = 'project1'
     release = '3.0'
     dname = 'manual'
     create_doc_local(pname, dname, release,
             'doc.syncer.generic_syncer.SingleURLSyncer',
             'http://hc.apache.org/httpcomponents-client-ga/tutorial/html/index.html')
     sync_doc(pname, dname, release)
     doc_key = dname + release
     model = load_model(pname, DOC_PATH, doc_key)
     self.assertEqual(9, len(model.pages))
     for page_key in model.pages:
         path = urlparse(page_key).path
         self.assertTrue(os.path.exists(path))
Example #15
0
def clear_channel_elements(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    for entry in model.entries:
        entry.parsed = False
    dump_model(model, pname, STHREAD_PATH, cname)

    channel = SupportChannel.objects.filter(project__dir_name=pname).\
            get(dir_name=cname)
    query = Message.objects.filter(sthread__channel=channel)
    print('Deleting {0} messages'.format(query.count()))
    for message in query.all():
        message.code_references.all().delete()
        message.code_snippets.all().delete()
        message.delete()
    SupportThread.objects.filter(channel=channel).delete()
Example #16
0
def toc_view_entries(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    size = len(model.entries)
    downloaded = sum((1 for entry in model.entries if entry.downloaded))
    last_d = -1
    for entry in model.entries:
        if entry.downloaded:
            last_d = entry.index
        else:
            break

    print('Table of Content Entries Status for {0}'.format(cname))
    print('Number of entries: {0}'.format(size))
    print('Number of downloaded entries: {0}'.format(downloaded))
    print('Last downloaded entry index: {0}'.format(last_d))
Example #17
0
def clear_channel_elements(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    for entry in model.entries:
        entry.parsed = False
    dump_model(model, pname, STHREAD_PATH, cname)

    channel = SupportChannel.objects.filter(project__dir_name=pname).\
            get(dir_name=cname)
    query = Message.objects.filter(sthread__channel=channel)
    print('Deleting {0} messages'.format(query.count()))
    for message in query.all():
        message.code_references.all().delete()
        message.code_snippets.all().delete()
        message.delete()
    SupportThread.objects.filter(channel=channel).delete()
Example #18
0
 def test_sync_doc_remote(self):
     pname = 'project1'
     release = '3.0'
     dname = 'manual'
     create_doc_local(
         pname, dname, release, 'doc.syncer.generic_syncer.SingleURLSyncer',
         'http://hc.apache.org/httpcomponents-client-ga/tutorial/html/index.html'
     )
     sync_doc(pname, dname, release)
     doc_key = dname + release
     model = load_model(pname, DOC_PATH, doc_key)
     self.assertEqual(9, len(model.pages))
     for page_key in model.pages:
         path = urlparse(page_key).path
         self.assertTrue(os.path.exists(path))
Example #19
0
def toc_view(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    size = len(model.toc_sections)
    downloaded = sum(
        (1 for section in model.toc_sections if section.downloaded))
    last_d = -1
    for section in model.toc_sections:
        if section.downloaded:
            last_d = section.index
        else:
            break

    print('Table of Content Status for {0}'.format(cname))
    print('Number of sections: {0}'.format(size))
    print('Number of downloaded sections: {0}'.format(downloaded))
    print('Last downloaded section index: {0}'.format(last_d))
Example #20
0
def toc_view_entries(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    size = len(model.entries)
    downloaded = sum(
            (1 for entry in model.entries if entry.downloaded))
    last_d = -1
    for entry in model.entries:
        if entry.downloaded:
            last_d = entry.index
        else:
            break

    print('Table of Content Entries Status for {0}'.format(cname))
    print('Number of entries: {0}'.format(size))
    print('Number of downloaded entries: {0}'.format(downloaded))
    print('Last downloaded entry index: {0}'.format(last_d))
Example #21
0
def toc_view(pname, cname):
    model = load_model(pname, STHREAD_PATH, cname)
    size = len(model.toc_sections)
    downloaded = sum(
            (1 for section in model.toc_sections if section.downloaded))
    last_d = -1
    for section in model.toc_sections:
        if section.downloaded:
            last_d = section.index
        else:
            break

    print('Table of Content Status for {0}'.format(cname))
    print('Number of sections: {0}'.format(size))
    print('Number of downloaded sections: {0}'.format(downloaded))
    print('Last downloaded section index: {0}'.format(last_d))
Example #22
0
 def test_sync_doc_local(self):
     pname = 'project1'
     release = '3.0'
     dname = 'manual'
     test_doc = os.path.join(settings.TESTDATA, 'httpclient402doc',
         'index.html')
     test_doc = os.path.normpath(test_doc)
     create_doc_local(pname, dname, release,
             'doc.syncer.generic_syncer.SingleURLSyncer',
             'file://' + test_doc) 
     sync_doc(pname, dname, release)
     doc_key = dname + release
     model = load_model(pname, DOC_PATH, doc_key)
     self.assertEqual(8, len(model.pages))
     for page_key in model.pages:
         path = urlparse(page_key).path
         self.assertTrue(os.path.exists(path))
Example #23
0
 def test_sync_doc_local(self):
     pname = 'project1'
     release = '3.0'
     dname = 'manual'
     test_doc = os.path.join(settings.TESTDATA, 'httpclient402doc',
                             'index.html')
     test_doc = os.path.normpath(test_doc)
     create_doc_local(pname, dname, release,
                      'doc.syncer.generic_syncer.SingleURLSyncer',
                      'file://' + test_doc)
     sync_doc(pname, dname, release)
     doc_key = dname + release
     model = load_model(pname, DOC_PATH, doc_key)
     self.assertEqual(8, len(model.pages))
     for page_key in model.pages:
         path = urlparse(page_key).path
         self.assertTrue(os.path.exists(path))
Example #24
0
def toc_download_section(pname, cname, start=None, end=None, force=False):
    model = load_model(pname, STHREAD_PATH, cname)
    syncer = import_clazz(model.syncer_clazz)()
    for section in model.toc_sections:
        index = section.index
        if start is not None and start > index:
            continue
        elif end is not None and end <= index:
            continue
        elif section.downloaded and not force:
            continue
        try:
            syncer.toc_download_section(model, section)
            dump_model(model, pname, STHREAD_PATH, cname)

            print('Downloaded section {0}'.format(section.index))
        except Exception:
            logger.exception('Error while downloading toc section')
Example #25
0
def toc_download_section(pname, cname, start=None, end=None, force=False):
    model = load_model(pname, STHREAD_PATH, cname)
    syncer = import_clazz(model.syncer_clazz)()
    for section in model.toc_sections:
        index = section.index
        if start is not None and start > index:
            continue
        elif end is not None and end <= index:
            continue
        elif section.downloaded and not force:
            continue
        try:
            syncer.toc_download_section(model, section)
            dump_model(model, pname, STHREAD_PATH, cname)

            print('Downloaded section {0}'.format(section.index))
        except Exception:
            logger.exception('Error while downloading toc section')
Example #26
0
def toc_download_entries(pname, cname, start=None, end=None, force=False):
    model = load_model(pname, STHREAD_PATH, cname)
    channel_path = get_channel_path(pname, cname)
    syncer = import_clazz(model.syncer_clazz)()
    for entry in model.entries:
        index = entry.index
        if start is not None and start > index:
            continue
        elif end is not None and end <= index:
            continue
        elif entry.downloaded and not force:
            continue
        try:
            syncer.download_entry(entry, channel_path)
            dump_model(model, pname, STHREAD_PATH, cname)

            print('Downloaded {0}'.format(entry.url))
        except Exception:
            logger.exception('Error while downloading entry')
Example #27
0
def toc_download_entries(pname, cname, start=None, end=None, force=False):
    model = load_model(pname, STHREAD_PATH, cname)
    channel_path = get_channel_path(pname, cname)
    syncer = import_clazz(model.syncer_clazz)()
    for entry in model.entries:
        index = entry.index
        if start is not None and start > index:
            continue
        elif end is not None and end <= index:
            continue
        elif entry.downloaded and not force:
            continue
        try:
            syncer.download_entry(entry, channel_path)
            dump_model(model, pname, STHREAD_PATH, cname)

            print('Downloaded {0}'.format(entry.url))
        except Exception:
            logger.exception('Error while downloading entry')