Python documentの例、document.document Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pack_and_document.py プロジェクト: saerhan/voxelent

def pack_n_doc(VOX_VERSION_NUMBER):
    if len(str(VOX_VERSION_NUMBER)) == 0:
        print "No version indicated"
        return
    pack_nucleo.pack(VOX_VERSION_NUMBER)
    pack_plexo.pack(VOX_VERSION_NUMBER)
    document.document(VOX_VERSION_NUMBER)

コード例 #2

0

ファイルを表示

ファイル: commands.py プロジェクト: Glavin001/Siri-API

 def light(self, q, wildcards):
     if (wildcards[1] == "one"):
         id = 1
     elif (wildcards[1] == "two"):
         id = 2
     elif (wildcards[1] == "three"):
         id = 3
     elif (wildcards[1] == "four"):
         id = 4
     elif (wildcards[1] == "five"):
         id = 5
     elif (wildcards[1] == "six"):
         id = 6
     else:
         id = -1
         
     html = document(self.connection)
     html.title("Light Switch")
     html.outgoing(q)
     if ((wildcards[0] == "on" or wildcards[0] == "off") and id > -1):
         html.incoming("Okay, let's turn " + wildcards[0] + " lamp " + wildcards[1])
         html.send()
         html.request("http://zimmer:2525/remote/" + wildcards[0] + "?id=" + str(id)) #Only works in my setup
     else:
         html.incoming("No such lamp available")
         html.send()

コード例 #3

0

ファイルを表示

ファイル: crawler_test.py プロジェクト: priestd09/gotap

def test_get_resolved_inverted_index():
    test_crawler = crawler(None, "")
    
    # test values
    WORD_ID_A = "1"
    WORD_ID_B = "2"
    WORD_ID_C = "3"
    WORD_A = "I"
    WORD_B = "am"
    WORD_C = "Groot"
    FONT_A = 0
    FONT_B = 0
    FONT_C = 0
    DOC_ID_A = "1"
    URL_A = "http://www.test.com"
    
    # Initialize crawler needed for the function call
    test_crawler._inverted_lexicon = {WORD_ID_A:WORD_A,WORD_ID_B:WORD_B,WORD_ID_C:WORD_C}
    test_crawler._document_index = {DOC_ID_A:document(URL_A)}
    curr_words = ( (WORD_ID_A,FONT_A), (WORD_ID_B,FONT_B), (WORD_ID_C,FONT_C) )
    curr_doc_id = DOC_ID_A
    test_crawler.add_words_to_inverted_index(curr_words,curr_doc_id)
    
    # Expected and actual result comparison
    expected_result = {WORD_A:{URL_A},WORD_B:{URL_A},WORD_C:{URL_A}}
    actual_result = test_crawler.get_resolved_inverted_index()
    
    # If the two results equal return true
    if cmp(expected_result, actual_result) == 0:
        return True
    else:
        return False

コード例 #4

0

ファイルを表示

    def light(self, q, wildcards):
        if (wildcards[1] == "one"):
            id = 1
        elif (wildcards[1] == "two"):
            id = 2
        elif (wildcards[1] == "three"):
            id = 3
        elif (wildcards[1] == "four"):
            id = 4
        elif (wildcards[1] == "five"):
            id = 5
        elif (wildcards[1] == "six"):
            id = 6
        else:
            id = -1

        html = document(self.connection)
        html.title("Light Switch")
        html.outgoing(q)
        if ((wildcards[0] == "on" or wildcards[0] == "off") and id > -1):
            html.incoming("Okay, let's turn " + wildcards[0] + " lamp " +
                          wildcards[1])
            html.send()
            html.request("http://zimmer:2525/remote/" + wildcards[0] + "?id=" +
                         str(id))  #Only works in my setup
        else:
            html.incoming("No such lamp available")
            html.send()

コード例 #5

0

ファイルを表示

ファイル: grapher.py プロジェクト: daivikswarup/opensoft16

    def OnOpen(self,e):
        """ Open a file"""
        dlg = wx.FileDialog(self, "Choose files", self.dirname, ".", "*.pdf", wx.FD_MULTIPLE)
        if dlg.ShowModal() == wx.ID_OK:
        #ProgressBar

            # self.totalpages=0
            # self.pagesdone=0
            # self.progress=wx.ProgressDialog('Progress','Starting...')
            #self.toolbar.EnableTool(5,True)
            self.docList=[]
            for address in dlg.GetPaths():
                 d = document(self,address,len(self.docList))
                 #d.start()
                 self.docList.append(d)
            self.RefreshTree()
            self.menuProcess.Enable(True)
            # for d in self.docList:
            #     d.join()
            #self.tree_ctrl = wx.TreeCtrl(self, -1, style=wx.TR_DEFAULT_STYLE | \
            #                                    wx.TR_FULL_ROW_HIGHLIGHT )
            #self.root = self.tree_ctrl.AddRoot('Files')
            
            #self.tree_ctrl.Bind(wx.EVT_TREE_SEL_CHANGED, self.OnSelChanged, id=1)

        dlg.Destroy()

コード例 #6

0

ファイルを表示

 def run_ex(self, itemid, content, call_index=True):
     try:
         doc = document(content)
         self.insert(doc, key=itemid)
         if call_index:
             self.lsh.index()
     except:
         pass

コード例 #7

0

ファイルを表示

def document_list_view(request, page = '1'):
    title = "文档列表"
    page = int(page)
    num = 20
    document_instance = documentModel()
    doccuments = document_instance.getDocumentList(page, num)
    docs = []
    for i in range(len(doccuments)):
        docs.append(document(doccuments[i]))
    doc_total = document_instance.getDocumentNum()
    page_total = (doc_total - 1) / num + 1

    if page == 1:
        previous_page = page
    else:
        previous_page = page - 1

    if page == page_total:
        next_page = page
    else:
        next_page = page_total + 1

    pages = []
    temp = page - page % 5
    if page_total <= 5:
        for i in range(page_total):
            pages.append(i + 1)
    elif page > page_total - page_total % 5:
        for i in range(page_total - page_total % 5, page_total):
            pages.append(i + 1)
    else:
        for i in range(5):
            pages.append(i + 1 + temp)



    return render_to_response("document.html",
                              {
                                  "title": title,
                                  'docs': docs,
                                  'project_name': webConfig.PROJECTNAME,
                                  'toplabel0': webConfig.TOPLABEL0,
                                  'toplabel1': webConfig.TOPLABEL1,
                                  'toplabel2': webConfig.TOPLABEL2,
                                  'toplabel3': webConfig.TOPLABEL3,
                                  'toplabel4': webConfig.TOPLABEL4,
                                  'toplabel5': webConfig.TOPLABEL5,
                                  'toplabel6': webConfig.TOPLABEL6,
                                  'page_total': page_total,
                                  'page': page,
                                  'previous_page': previous_page,
                                  'next_page': next_page,
                                  'pages': pages,
                                  # 'category': category,
                                  # 'category_name': category_name

                              }
                              )

コード例 #8

0

ファイルを表示

 def query(self, doc, topn=1000):
     try:
         unicodedata.normalize('NFKC', doc)
         doc = document(doc)
         minhash = doc.get_minhash(doc.k_shingles,
                                   config.MINHASH_CONFIG['num_permutation'])
         return self.lsh.query(minhash, topn)
     except:
         return []

コード例 #9

0

ファイルを表示

def load_json_dataset(dataset_file):
    logging.info('loading dataset: ' + dataset_file + ' ...')
    dataset = []
    ds = json.load(open(dataset_file))
    for i, item in enumerate(ds):
        article = document(item['html_text'], item['propaganda_label'],
                           item['gdlt_id'], item['mbfc_url'])
        dataset.append(article)
    logging.info('dataset loaded !')
    return dataset

コード例 #10

0

ファイルを表示

ファイル: main.py プロジェクト: daivikswarup/opensoft16

def __main__():
	# This function process all the pdfs in the docList
	e = document()
	e.process("a.pdf",0)
	#docList = []
	#docList.append(e)
	

	for d in docList:
		d.process()

コード例 #11

0

ファイルを表示

ファイル: commands.py プロジェクト: Hackworth/Siri-API

 def no_action (self, q, wildcards):
     spvoice_url = 'http://localhost:9000/command'
     params = urllib.parse.urlencode({ 'command': q }).encode('utf8')
     response = urllib.request.urlopen(spvoice_url, params).read().decode("utf-8")
     response = json.loads(response)
     html = document(self.connection)
     html.title("House")
     html.incoming(q)
     html.outgoing(response['response'].replace("\\n","<br />\n"))
     html.send()

コード例 #12

0

ファイルを表示

ファイル: author_recognizer.py プロジェクト: haknsahn/authorRecognizer

def read_documents(author_dir):
	document_list = []
	auth = author(os.path.basename(author_dir))					# Constructs 'author' object with given name.
	for doc_name in os.listdir(author_dir):
		if doc_name.endswith('.txt'):
			doc_text = open(author_dir + "/" + doc_name,'r').read()						
			document_list.append(document(auth.name + doc_name[:-4], auth.name, doc_text))		# Constructs documents with given file and author,
	auth.doc_list = document_list
	auth.doc_count = len(document_list)
	return auth																# adds them into a list. Then returns author object.

コード例 #13

0

ファイルを表示

ファイル: crawler_test.py プロジェクト: priestd09/gotap

def test_sorted_resolved_inverted_index():
    test_crawler = crawler(None, "")
    
    # test values
    WORD_A = "test"
    DOC_ID_A = 1
    DOC_ID_B = 2
    DOC_ID_C = 3
    DOC_ID_D = 4
    DOC_A = "http://www.A.com"
    DOC_B = "http://www.B.com"
    DOC_C = "http://www.C.com"
    DOC_D = "http://www.D.com"
    
    # Initialize crawler needed for the function call
    word_id = test_crawler.word_id(WORD_A)
    doc_id_list = []
    doc_id_list.append(DOC_ID_A)
    doc_id_list.append(DOC_ID_B)
    doc_id_list.append(DOC_ID_D)
    test_crawler.document_id(DOC_A)
    test_crawler.document_id(DOC_B)
    test_crawler.document_id(DOC_C)
    test_crawler.document_id(DOC_D)
    test_crawler._inverted_index[word_id] = doc_id_list
    test_crawler.add_link(DOC_ID_A, DOC_ID_B)
    test_crawler.add_link(DOC_ID_B, DOC_ID_D)
    test_crawler.add_link(DOC_ID_D, DOC_ID_C)
    test_crawler.compute_page_rank()
    test_crawler.construct_sorted_resolved_inverted_index()
    # Expected and actual result comparison
    expected_result = [document(DOC_D),document(DOC_B),document(DOC_A)]
    actual_result = test_crawler._sorted_resolved_inverted_index[WORD_A]

    # If the two results equal return true
    bool_A = (cmp(expected_result[0].get_doc_url(), actual_result[0].get_doc_url()) == 0)
    bool_B = (cmp(expected_result[1].get_doc_url(), actual_result[1].get_doc_url()) == 0)
    bool_C = (cmp(expected_result[2].get_doc_url(), actual_result[2].get_doc_url()) == 0)
    if bool_A and bool_B and bool_C:
        return True
    else:
        return False

コード例 #14

0

ファイルを表示

def load_dataset(dataset_file, classification="binary"):
    logging.info('loading dataset: ' + dataset_file + ' ...')
    dataset = []
    with codecs.open(dataset_file, 'r') as f:
        i = 0
        for line in f:
            fields = line.split('\t')
            if fields[0] == '3':
                prop_gold = '1'
            else:
                prop_gold = '-1'
            if classification == 'binary':
                article = document(fields[1], prop_gold, str(i), '')
            else:
                article = document(fields[1], fields[0], str(i), '')
            dataset.append(article)
            i += 1
        f.close()
    logging.info('dataset loaded !')
    return dataset

コード例 #15

0

ファイルを表示

ファイル: corpus.py プロジェクト: whiteh/sentiment

  def run(self, line):
    parts = line.split(",")
    doc = document()
    doc.setText(parts[self.text_col])

    if (self.label_const == None):
      #print parts[self.label_col]+": "+self.label_dict[self.clean(parts[self.label_col])]
      doc.setLabel(self.label_dict[self.clean(parts[self.label_col])])
    else:
      doc.setLabel(self.label_const)
    doc.setText(self.clean(parts[self.text_col]))
    return doc

コード例 #16

0

ファイルを表示

 def run(self, docs):
     count = 1
     for itemid, content in docs.items():
         try:
             doc = document(content)
             self.insert(doc, key=itemid)
             print('\rpushed %d items' % (count)),
             sys.stdout.flush()
             count += 1
         except:
             pass
     self.lsh.index()
     print('')

コード例 #17

0

ファイルを表示

ファイル: crawler_test.py プロジェクト: priestd09/gotap

def test_document_index():
    test_crawler = crawler(None, "")
    
    # test values
    URL_A = "http://www.testA.com"
    DOC_ID_A = 1
    URL_B = "http://www.testB.com"
    DOC_ID_B = 2
    
    # Initialize crawler needed for the function call
    test_crawler.document_id(URL_A)
    test_crawler.document_id(URL_B)
    # Expected and actual result comparison
    expected_result = {DOC_ID_A:document(URL_A),DOC_ID_B:document(URL_B)}
    actual_result = test_crawler._document_index

    # If the two results equal return true
    bool_A = (cmp(expected_result[DOC_ID_A].get_doc_url(), actual_result[DOC_ID_A].get_doc_url()) == 0)
    bool_B = (cmp(expected_result[DOC_ID_B].get_doc_url(), actual_result[DOC_ID_B].get_doc_url()) == 0)
    if bool_A and bool_B:
        return True
    else:
        return False

コード例 #18

0

ファイルを表示

ファイル: corpus.py プロジェクト: wilixx/arXiv-Gene

    def add_document(self, file_path):
        # Initializes this file as a new document in the corpus

        try:
            # load the data
            f = open(join(file_path), 'r')
            raw_data = f.read()
            f.close()

            # create the document object
            d = document(self, raw_data, file_path.split("/")[-1])
            self.documents[d.ID] = d
        except (UnicodeDecodeError):
            self.log_file.write("UnicodeDecodeError on file " + file_path +
                                "\n")

コード例 #19

0

ファイルを表示

ファイル: top.py プロジェクト: daivikswarup/opensoft16

	def do(self,addresses):
		# This function process all the pdfs in the docList
		docList=[]
		count = 0
		for address in addresses:
			e = document()
			e.process(address,count)
			docList.append(e)
			count=count+1
		#docList = []
		#docList.append(e)
		
		#for d in docList:
		#	d.process()
		return docList

コード例 #20

0

ファイルを表示

ファイル: corpus.py プロジェクト: hamsal/arXiv-Gene

    def add_document(self,file_path):
        # Initializes this file as a new document in the corpus
        
        try:
            # load the data
            f = open(join(file_path), 'r')
            raw_data = f.read()
            f.close()

            # create the document object
            d = document(self,raw_data,file_path.split("/")[-1])
            self.documents[d.ID] = d
        except (UnicodeDecodeError):
            self.log_file.write("UnicodeDecodeError on file " + file_path + 
                                "\n")

コード例 #21

0

ファイルを表示

ファイル: crawler.py プロジェクト: priestd09/gotap

 def document_id(self, url):
     """Get the document id for some url."""
     if url in self._doc_id_cache:
         return self._doc_id_cache[url]
     
     # TODO: just like word id cache, but for documents. if the document
     #       doesn't exist in the db then only insert the url and leave
     #       the rest to their defaults.
     
     doc_id = self._mock_insert_document(url)
     self._doc_id_cache[url] = doc_id
     
     # add the newly created document object to the document index
     self._document_index[doc_id] = document(url)
     #self._document_index[doc_id] = url
     return doc_id

コード例 #22

0

ファイルを表示

def load_myds(dataset_file):
    logging.info('loading dataset: ' + dataset_file + ' ...')
    dataset = []
    with codecs.open(dataset_file, 'r', encoding='utf8') as f:
        i = 0
        for line in f:
            line = line.strip()
            fields = line.split('\t')
            article = document(
                fields[0], fields[-1], fields[4],
                fields[-2])  # html_text, prop_label, gdelt_id, gdelt_sourceURL
            dataset.append(article)
            i += 1
        f.close()
    logging.info('dataset loaded !')
    return dataset

コード例 #23

0

ファイルを表示

ファイル: __main__.py プロジェクト: daivikswarup/opensoft16

	def do(self,addresses):
		# This function process all the pdfs in the docList
		docList=[]
		count = 0
		#self,parent,pdf_path,job_number
		for address in addresses:
			e = document(self,address,0)
			e.process()
			docList.append(e)
			count=count+1
		#docList = []
		#docList.append(e)
		
		#for d in docList:
		#	d.process()
		return docList

コード例 #24

0

ファイルを表示

ファイル: canvas.py プロジェクト: asuar078/python_workspace

 def wrappedindocument(self, file=None, **kwargs):
     page_kwargs = {}
     write_kwargs = {}
     for name, value in kwargs.items():
         if name.startswith("page_"):
             page_kwargs[name[5:]] = value
         elif name.startswith("write_"):
             write_kwargs[name[6:]] = value
         else:
             warnings.warn("Keyword argument %s of %s method should be prefixed with 'page_'" %
                             (name, method.__name__), DeprecationWarning)
             page_kwargs[name] = value
     d = document.document([document.page(self, **page_kwargs)])
     self.__name__ = method.__name__
     self.__doc__ = method.__doc__
     return method(d, file, **write_kwargs)

コード例 #25

0

ファイルを表示

ファイル: crawler_single.py プロジェクト: anasfaris/gotap

 def document_id(self, url):
     """Get the document id for some url."""
     if url in self._doc_id_cache:
         return self._doc_id_cache[url]
     
     # TODO: just like word id cache, but for documents. if the document
     #       doesn't exist in the db then only insert the url and leave
     #       the rest to their defaults.
     
     doc_id = self._mock_insert_document(url)
     self._doc_id_cache[url] = doc_id
     
     # add the newly created document object to the document index
     self._document_index[doc_id] = document(url)
     #self._document_index[doc_id] = url
     return doc_id

コード例 #26

0

ファイルを表示

ファイル: grPanel.py プロジェクト: daivikswarup/opensoft16

    def __init__(self):
        """Constructor"""
        wx.Frame.__init__(self, None, wx.ID_ANY,
                          "Notebook Tutorial",
                          size=(600,400)
                          )
        panel = wx.Panel(self)
        self.obj=document()
        self.obj.pageList=[page(1,1)]
        lis=[self.obj, self.obj, self.obj]
        notebook = DocNoteBook(panel,lis)
        sizer = wx.BoxSizer(wx.VERTICAL)
        sizer.Add(notebook, 1, wx.ALL|wx.EXPAND, 5)
        panel.SetSizer(sizer)
        self.Layout()
 
        self.Show()

コード例 #27

0

ファイルを表示

 def wrappedindocument(self, file=None, **kwargs):
     page_kwargs = {}
     write_kwargs = {}
     for name, value in kwargs.items():
         if name.startswith("page_"):
             page_kwargs[name[5:]] = value
         elif name.startswith("write_"):
             write_kwargs[name[6:]] = value
         else:
             warnings.warn(
                 "Keyword argument %s of %s method should be prefixed with 'page_'"
                 % (name, method.__name__), DeprecationWarning)
             page_kwargs[name] = value
     d = document.document([document.page(self, **page_kwargs)])
     self.__name__ = method.__name__
     self.__doc__ = method.__doc__
     return method(d, file, **write_kwargs)

コード例 #28

0

ファイルを表示

ファイル: device.py プロジェクト: notti/scan-daemon

 def scan(self, params):
     finish = params['document-finish']
     document_type = params['document-type']
     del params['document-type']
     del params['document-finish']
     params['output-file'] = self.name+'-scan-'+str(self.id)+'-%04d'
     params['d'] = self.get_sane_name()
     command = ['/usr/bin/scanadf']
     for option, value in params.iteritems():
         if len(option) == 1:
             command.append('-'+option)
         else:
             command.append('--'+option)
         if len(value):
             command.append(value)
     command += ['--pagewidth', '210', '--pageheight', '297', '-x', '210', '-y', '297']
     print >> sys.stderr, command
     self.unclaim()
     self.scanadf = subprocess.Popen(command, cwd='/dev/shm', stderr=subprocess.PIPE, bufsize=1)
     if self.doc == None:
         self.doc = document.document(self.config, format=document_type)
     error = False
     while True:
         line = self.scanadf.stderr.readline()
         print >> sys.stderr, line
         if not line:
             break
         msg = line.strip().split(' ')
         if msg[0] == 'scanadf:':
             if msg[1][0:7] == 'rounded':
                 continue
             error = True
             break
         if len(msg) == 3:
             if msg[1] == 'document':
                 self.doc.process_image('/dev/shm/'+msg[2])
     if (not error) and finish:
         self.doc.finish()
         self.doc = None
     else:
         self.timer = 0
     self.scanadf.wait()
     self.scanadf = None
     self.claim()
     self.id = self.id + 1

コード例 #29

0

ファイルを表示

ファイル: author_recognizer.py プロジェクト: haknsahn/authorRecognizer

def determine_author(text, author_list, vocabulary, tot_doc_count, use_extrafuture):
	auth_res = {}
	doc = document("","",text)
	doc.count_sentence()
	doc.compute_ave_words_in_sentence()
	doc.count_quatation_mark()
	doc.count_exclamation_mark()
	bow = doc.construct_bow()
	for auth in author_list:
		auth_res[auth.name] = log(1.0 * auth.doc_count / tot_doc_count)
		for token, count in bow.items():
			token_pos = log((auth.vocabulary.get(token,0) + alpha) / (auth.tot_token_count + alpha * len(vocabulary)))
			auth_res[auth.name] += token_pos * count
		if use_extrafuture :
			auth_res[auth.name] += abs(auth.ave_words_in_sentence - doc.ave_words_in_sentence) * word_coef
			auth_res[auth.name] += abs(auth.ave_sentence_count - doc.sentence_count) * sentence_coef
			auth_res[auth.name] += abs(auth.ave_quatation_mark - doc.quatation_mark_count) * quatation_coef
			auth_res[auth.name] += abs(auth.ave_exclamation_mark - doc.exclamation_mark_count) * exclamation_coef

	return sorted(auth_res, key=auth_res.get,reverse=True)[0]

コード例 #30

0

ファイルを表示

ファイル: main.py プロジェクト: Junlings/publicationsupport

    def __init__(self,parent,settings):
        # based on a frame, so set up the frame
        GenericFrameSimple.__init__(self,parent,wx.ID_ANY, settings)
        
        
        self.mainpanel = wx.Panel(self, -1,style=wx.EXPAND)
        
        self.doctree = MyDictTree(self.mainpanel,'Document')

        
        self.ModelNoteBook = wx.aui.AuiNotebook(self.mainpanel,1,size=(500,500),style=wx.aui.AUI_NB_DEFAULT_STYLE)

        self.sizer=wx.BoxSizer(wx.HORIZONTAL)
        self.sizer.Add(self.doctree,0,wx.EXPAND)
        self.sizer.Add(self.ModelNoteBook,1,wx.EXPAND)
        self.mainpanel.SetSizer(self.sizer)
        
        self.doc = document()
        
        self.dirname = ''

コード例 #31

0

ファイルを表示

ファイル: crawler_mthread.py プロジェクト: priestd09/gotap

    def document_id(self, url):
        """Get the document id for some url."""
        # Acquire doc_id_lock before enter critical section
        crawler.doc_id_lock.acquire()
        if url in self._doc_id_cache:
            # Release doc_id_lock before return
            crawler.doc_id_lock.release()
            return self._doc_id_cache[url]

        # TODO: just like word id cache, but for documents. if the document
        #       doesn't exist in the db then only insert the url and leave
        #       the rest to their defaults.

        doc_id = self._mock_insert_document(url)
        self._doc_id_cache[url] = doc_id
        # Release doc_id_lock
        crawler.doc_id_lock.release()
        # add the newly created document object to the document index
        self._document_index[doc_id] = document(url)
        #self._document_index[doc_id] = url
        return doc_id

コード例 #32

0

ファイルを表示

ファイル: crawler_mthread.py プロジェクト: anasfaris/gotap

 def document_id(self, url):
     """Get the document id for some url."""
     # Acquire doc_id_lock before enter critical section
     crawler.doc_id_lock.acquire()
     if url in self._doc_id_cache:
         # Release doc_id_lock before return
         crawler.doc_id_lock.release()
         return self._doc_id_cache[url]
     
     # TODO: just like word id cache, but for documents. if the document
     #       doesn't exist in the db then only insert the url and leave
     #       the rest to their defaults.
     
     doc_id = self._mock_insert_document(url)
     self._doc_id_cache[url] = doc_id
     # Release doc_id_lock
     crawler.doc_id_lock.release()
     # add the newly created document object to the document index
     self._document_index[doc_id] = document(url)
     #self._document_index[doc_id] = url
     return doc_id

コード例 #33

0

ファイルを表示

ファイル: ExampleCreateFile.py プロジェクト: nagyistge/Python-Huddle-Api-Example

def run_example():
    #first lets get the auth code from the client
    request_token_url = huddleAuthServer + "request?response_type=code" + "&client_id=" + consumer_key + "&redirect_uri=" + redirect_uri

    print "Get Your Authorization Code and paste it back into python\n" + request_token_url
    code = raw_input('--> ')

    auth = oAuth.oAuth(huddleAuthServer, code, consumer_key, redirect_uri)

    #store our access token
    tokenStore = auth.handleAccessToken()

    #now we can make calls to the api
    #we only have the uri for what folder we want to create the file in so first of all lets find the upload uri of that
    api = huddleApi.huddleApi(huddleApiServer, tokenStore)
    getFolder = folder.folder(api.getFolder("http://api.huddle.dev/files/folders/1237980/"))
    print getFolder.getLinksWithRel("create-document")
    getDocument = document.document(api.createFile("foo", "bar", getFolder.getLinksWithRel("create-document")))

    #time to upload the contents
    api.uploadToFile("C:\\Users\\adam.flax\\Documents\\foo.txt", getDocument.getLinkWithRel("upload"))
    os.system("pause")

コード例 #34

0

ファイルを表示

ファイル: commands.py プロジェクト: Glavin001/Siri-API

 def hans(self, q, wildcards):
     html = document(self.connection)
     html.title("Welcome")
     html.outgoing(q)
     html.incoming("Hi Robin!")
     html.send()

コード例 #35

0

ファイルを表示

ファイル: commands.py プロジェクト: Glavin001/Siri-API

 def no_action (self, q, wildcards): #Is called if no action found
     html = document(self.connection)
     html.title("Error")
     html.outgoing(q)
     html.incoming("Sorry, I don't know how to do that")
     html.send()

コード例 #36

0

ファイルを表示

ファイル: parser.py プロジェクト: iamweiweishi/ERNIE

	def parse(self,xml_string,input_file_name,curs):
		
		
		parser_dtd=etree.XMLParser(encoding='ISO-8859-1',dtd_validation=True,load_dtd=True,remove_comments=True,recover=True)
		root = etree.fromstring(xml_string.encode('ISO-8859-1'),parser_dtd)
		#print(type(root)) 
		for REC in root:
			
			#Parse publication and create a publication object containing all the attributes of publication
			new_pub=pub.publication()

			author_names=[]
		
			new_pub.source_type=REC.tag
			
			new_pub.source_id=REC.attrib.get('key')
			
			if 'mdate' in REC.attrib:
				new_pub.modified_date=REC.attrib.get('mdate')

			if 'publtype' in REC.attrib:
				new_pub.document_type=REC.attrib.get('publtype')

			#Can be more than 1 author
			author_fields=REC.findall('author')
			if author_fields is not None:
				for auth in author_fields:
						if 'orcid' in auth.attrib:
							author_names.append((auth.text,auth.attrib.get('orcid')))
						else:
							author_names.append((auth.text,None))
				
			pages=REC.find('pages')
			if pages is not None:
				if len(pages.text.split('-')) == 2:
					new_pub.begin_page=pages.text.split('-')[0]
					new_pub.end_page=pages.text.split('-')[1]
				else:
					new_pub.begin_page=pages.text.split('-')[0]

			title=REC.find('title')
			if title is not None:
				new_pub.document_title=title.text

			issue_no=REC.find('number')
			if issue_no is not None:
				new_pub.issue=issue_no.text

			year=REC.find('year')
			if year is not None:
				new_pub.publication_year=year.text

			address=REC.find('address')
			if address is not None:
				new_pub.publisher_address=address.text

			publisher=REC.find('publisher')
			if publisher is not None:
				new_pub.publisher_name=publisher.text

			vol=REC.find('volume')
			if vol is not None:
				new_pub.volume=vol.text

			s_title=REC.find('journal')
			if s_title is not None:
				new_pub.source_title=s_title.text
			else:
				s_title=REC.find('booktitle')
				if s_title is not None:
					new_pub.source_title=s_title.text

			#Query to insert publication record into the publications table in the database
			curs.execute("INSERT INTO dblp_publications (begin_page,modified_date,document_title,document_type,end_page,issue,"\
				"publication_year,publisher_address,publisher_name,source_id,source_title,source_type,volume)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"\
				" ON CONFLICT (source_id) DO UPDATE SET begin_page=excluded.begin_page,modified_date=excluded.modified_date,document_title=excluded.document_title,"\
				"document_type=excluded.document_type,end_page=excluded.end_page,issue=excluded.issue,publication_year=excluded.publication_year,"\
				"publisher_address=excluded.publisher_address,publisher_name=excluded.publisher_name,source_id=excluded.source_id,source_title=excluded.source_title,"\
				"source_type=excluded.source_type,volume=excluded.volume,last_updated_time=current_timestamp;",
				(str(new_pub.begin_page),new_pub.modified_date,str(new_pub.document_title),str(new_pub.document_type),str(new_pub.end_page),str(new_pub.issue),
					str(new_pub.publication_year),str(new_pub.publisher_address),str(new_pub.publisher_name),str(new_pub.source_id),str(new_pub.source_title),str(new_pub.source_type),
					str(new_pub.volume)))


			#parse document identifier fields for each publication
			new_doc=doc.document()

			#A dictionary which stores all the document id's and types
			docs=dict()


			new_doc.source_id=new_pub.source_id
			ee=REC.findall('ee')
			if ee is not None:
				for i in ee:
					docs[i.text]=i.tag
				
			url=REC.findall('url')
			if url is not None:
				for i in url:
					docs[i.text]=i.tag
			
			crossref=REC.findall('crossref')
			if crossref is not None:
				for i in crossref:
					docs[i.text]=i.tag

			isbn=REC.find('isbn')
			if isbn is not None:
				docs[isbn.text]=isbn.tag
				
			series=REC.find('series')
			if series is not None:
				docs[series.text]=series.tag
				
			cdrom=REC.find('cdrom')
			if cdrom is not None:
				docs[cdrom.text]=cdrom.tag

			school=REC.find('school')
			if school is not None:
				docs[school.text]=school.tag

			notes=REC.find('notes')
			if notes is not None:
				docs[notes.text]=notes.tag

			#Inserting records into dblp_document_identifiers
			for text,tag in docs.items():
				new_doc.document_id=text
				new_doc.document_id_type=tag

				curs.execute("INSERT INTO dblp_document_identifiers(source_id,document_id,document_id_type) VALUES(%s,%s,%s)"\
					"ON CONFLICT (source_id,document_id,document_id_type) DO UPDATE SET source_id=excluded.source_id,"\
					"document_id=excluded.document_id,document_id_type=excluded.document_id_type,last_updated_time=current_timestamp;",
					(str(new_doc.source_id),str(new_doc.document_id),str(new_doc.document_id_type)))

			#parse author fields for dblp_authors
			new_auth=author.author()

			editor=REC.find('editor')
			if editor is not None:
				new_auth.editor_name=editor.text

			seq_no=0	

			for name in author_names:
				new_auth.first_name=' '.join(name[0].split()[:-1])
				new_auth.last_name=name[0].split()[-1]
				new_auth.full_name=name[0]
				new_auth.source_id=new_pub.source_id
				new_auth.seq_no=seq_no
				if name[1] is not None:
					new_auth.orc_id=name[1]


				curs.execute("INSERT INTO dblp_authors(source_id,full_name,last_name,first_name,seq_no,orc_id,editor_name)"\
					"VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id,seq_no) DO UPDATE SET source_id=excluded.source_id,"\
					"full_name=excluded.full_name,last_name=excluded.last_name,first_name=excluded.first_name,seq_no=excluded.seq_no,"\
					"orc_id=excluded.orc_id,editor_name=excluded.editor_name,last_updated_time=current_timestamp;",(str(new_auth.source_id),str(new_auth.full_name),str(new_auth.last_name),
						str(new_auth.first_name),str(new_auth.seq_no),str(new_auth.orc_id),str(new_auth.editor_name)))

				seq_no+=1

			#parser citataion fields for dblp_references

			new_ref=reference.reference()
			new_ref.source_id=new_pub.source_id

			citations=REC.findall('cite')
			if citations is not None:
				for cite in citations:
					if cite != '...':
						new_ref.cited_source_id=cite.text

						curs.execute("INSERT INTO dblp_references(source_id,cited_source_id) VALUES(%s,%s) ON CONFLICT ON CONSTRAINT"\
							" dblp_references_pk DO UPDATE SET source_id=excluded.source_id,cited_source_id=excluded.cited_source_id,"\
							"last_updated_time=current_timestamp;",(str(new_ref.source_id),str(new_ref.cited_source_id)))

コード例 #37

0

ファイルを表示

ファイル: classifier.py プロジェクト: whiteh/sentiment

def preprocess(sent):
    doc = document()
    doc.setText(sent)
    process.processDoc(doc)
    return doc.toVector()[0]

コード例 #38

0

ファイルを表示

ファイル: parser.py プロジェクト: Drin/RestaurantReview

    def parse_review_text(self, review_file, review_text):
        review_ndx = 1
        # we copy this to a variable because it's going to be modified
        text = review_text

        # This variable is to accommodate any dbag who put more than one review
        # in a file
        parsed_docs = []

        paragraph_ndx = 0
        (reviewer, review_ratings, review_paragraphs) = self.clear_doc_params()

        if (re.search('|'.join(ALDRIN_TELL_TALES), text, re.I)):
            text = re.sub('<br />', ' ', text)
        elif (re.search('|'.join(JOSEPH_TELL_TALES), text, re.I)):
            text = re.sub('(?:[^>])<br />(?:[^<])', ' ', text)

        review_sections = re.findall(('(?:<p.*?>)?(.*?)' + EOL_REGEX), text, re.S)

        is_review_section = 0
        for section in review_sections:
            section = re.sub('<.*?>', '', section)
            if (re.sub('\s+', '', section) == ''):
                continue

            if ('DEBUG' in os.environ):
                print("%s:\n'%s'\n\n" % (review_file, section))

            if (re.match('\w+\s*:\s*\w+', section)):
                if (section.count(':') > 1):
                    reviewer = 'X'
                    continue

                (uppercase_label, value_with_stuff) = section.split(':')
                label = uppercase_label.lower()
                value = value_with_stuff.strip()                
                
                if (label in review_ratings and
                    review_ratings[label] is None):
                    review_ratings[label] = value

                elif (label == 'overall'):
                    review_ratings['rating'] = value

                elif (re.match('reviewer', label, re.I)):
                    if (reviewer is not None):
                        parsed_docs.append(document(author=reviewer,
                                                    ratings=deepcopy(review_ratings),
                                                    paragraphs=deepcopy(review_paragraphs),
                                                    filename="%s-%s" %
                                                    (review_file, review_ndx)))
                        paragraph_ndx = 0
                        is_review_section = 0
                        review_ndx += 1
                        (reviewer, review_ratings, review_paragraphs) = self.clear_doc_params()

                    # we are still on a reviewer text line so this has to be
                    # done after everything has been 'Reset' from the
                    # previously observed review
                    reviewer = value

            elif (re.match('written review', section, re.I)):
                if (re.match('written review:(?:\w+\s*)+', section, re.I)):
                    odd_section = re.sub('WRITTEN REVIEW:', '', section, re.I)
                    review_paragraphs[paragraph_ndx] = section
                    paragraph_ndx += 1
                is_review_section = 1

            #elif(not re.search(':', section)):
            elif(is_review_section):
                section = re.sub('<br \/>', '', section)
                review_paragraphs[paragraph_ndx] = section
                paragraph_ndx += 1

        if (reviewer is not None):
            parsed_docs.append(document(author=reviewer,
                                        ratings=review_ratings,
                                        paragraphs=review_paragraphs,
                                        filename="%s-%s" % (review_file,
                                                            review_ndx)))

        return parsed_docs

コード例 #39

0

ファイルを表示

ファイル: canvas.py プロジェクト: Bhanditz/esl-sent-anal

 def wrappedindocument(self, file=None, **kwargs):
     d = document.document([document.page(self, **kwargs)])
     self.__name__ = method.__name__
     self.__doc__ = method.__doc__
     return method(d, file)

コード例 #40

0

ファイルを表示

 def timetable(self, q, wildcards):
     html = document(self.connection)
     html.redirect(
         "http://zimmer:5000/index.php?component=timetable&resolution=desktop"
     )  #Only works in my setup
     html.send()

コード例 #41

0

ファイルを表示

ファイル: commands.py プロジェクト: Glavin001/Siri-API

 def timetable(self, q, wildcards):
     html = document (self.connection)
     html.redirect("http://zimmer:5000/index.php?component=timetable&resolution=desktop") #Only works in my setup
     html.send()

コード例 #42

0

ファイルを表示

 def hans(self, q, wildcards):
     html = document(self.connection)
     html.title("Welcome")
     html.outgoing(q)
     html.incoming("Hi Robin!")
     html.send()

コード例 #43

0

ファイルを表示

 def no_action(self, q, wildcards):  #Is called if no action found
     html = document(self.connection)
     html.title("Error")
     html.incoming(q)
     html.outgoing("Sorry, I don't know how to do that")
     html.send()

コード例 #44

0

ファイルを表示

ファイル: pipeline.py プロジェクト: linnvel/email-web-app-master

def load_document(path, label, header_seperator='\n\n'):
    ''' Load document from file path, return document class'''

    with open(path, 'r') as file:
        return document(path,label).parser(file,header_seperator)

コード例 #45

0

ファイルを表示

ファイル: canvas.py プロジェクト: dcf21/pyxplot7

 def wrappedindocument(self, file, *args, **kwargs):
     d = document.document([document.page(self, *args, **kwargs)])
     self.__name__ = method.__name__
     self.__doc__ = method.__doc__
     return method(d, file)