Example #1
0
def delete_files(project_id, user, paths):
    c, p = before_project_service(project_id, user)
    if c is not None:
        return c, None
    File.delete_files(project_id, paths)

    return code_list.Success
Example #2
0
async def file_upload(files: List[UploadFile] = File(...)):

    result = []
    for file in files:
        try:
            data = file.file.read()
            md5 = hashlib.md5(data).hexdigest()

            file_type = FileType.parse_from_content_type(file.content_type)
            file_data = audio.get_audio_metadata(
                data) if file_type == "audio" else {}

            extension = file.filename.split(".")[-1]

            provider = Config.get("file_provider")

            model = FileM(md5=md5,
                          extension=extension,
                          file_type=file_type,
                          provider=provider,
                          metadata=file_data)

            model.save()
            FileProvider.get_instance(provider).write(md5, data)

            res = {
                "file_id": str(model.id),
                "filename": file.filename,
                "md5": md5,
                "filetype": file_type,
                "data": file_data,
                "code": 200
            }

            result.append(res)

            chan.publish("file_uploaded", {
                "model": model,
                "result": res,
                "raw": data
            })

        except NotUniqueError as e:
            res = {"filename": file.filename, "code": 400}

            result.append(res)

    return response.response_ok(result)
Example #3
0
def to_file(date, mars_class):
    filename = date.strftime('%Y%m')
    file_path = "downloads/cams.{}.{}.grib".format(filename, mars_class)
    return File(filename=filename,
                path=file_path,
                priority=1,
                modify_date=date)
Example #4
0
 def get_database(config, setting):
     if config == "mongo":
         return Mongo(setting)
     elif config == "file":
         return File(setting)
     else:
         return Mongo(setting)
def test_delete_album(app, json_albums):
    # if data_albums.name.isspace() or not data_albums.name:
    if json_albums.name.isspace() or not json_albums.name:
        return
    albums = [json_albums]
    # albums = [data_albums]

    list_missing_items = app.album.missing_albums(albums)

    if len(list_missing_items) > 0:
        for item in list_missing_items:
            app.album.create(
                Album(name=item.name,
                      privacy="private_but_link",
                      description="album for delete"))
    list_albums_for_delete = app.album.info_about_albums_by_name(albums)
    list_files_in_random_albums = app.file.get_images_in_albums(
        list_albums_for_delete)

    if list_files_in_random_albums:
        files_for_move = File(
            list_name=list([x.name for x in list_files_in_random_albums]))
        app.album.move_to_album([Album(name="")], files_for_move)

    app.album.delete(list_albums_for_delete)
    new_list_albums = app.album.get_album_list()
    diff_items = app.album.difference_in_lists_album(albums, new_list_albums)
    assert diff_items == albums
    assert len(new_list_albums) == app.album.count_ui_albums()
def fetch_cover(model):
    """
        The function will automatically fetch the cover for the song and the album if the `cover_files` 
        is missing.
    """
    if len(model.audio_files) == 0:
        return

    cover = audio.get_audio_cover(model.audio_files[0].read())
    if cover:
        cover_model = File.create(
            cover["data"], cover["mime_type"].split("/")[-1], "image", {
                "height": cover["height"],
                "width": cover["width"],
                "mime_type": cover["mime_type"],
                "description": cover["description"]
            })
        if len(model.cover_files) == 0:
            model.cover_files.append(cover_model)
            model.save()

        album_model = model.album
        if len(album_model.cover_files) == 0:
            album_model.cover_files.append(cover_model)
            album_model.save()
Example #7
0
def get_file_with_current_path(list_path_to_file):
    path = Path(__file__).parent.parent
    res = []
    for path_to_file in list_path_to_file:
        path_to_file = os.path.join(path, path_to_file)
        res.append(path_to_file)
    return File(list_name=res)
Example #8
0
def get_all_files_with_current_path(folder):
    path = os.path.join(Path(__file__).parent.parent, folder)
    only_files = [f for f in listdir(path) if isfile(join(path, f))]
    res = []
    for i in only_files:
        abs_path = os.path.join(path, i)
        res.append(abs_path)
    return File(list_name=res)
def extract_file_name(files):
    files_name = []
    for name in files.list_name:
        filename_w_ext = os.path.basename(name)
        filename, file_extension = os.path.splitext(filename_w_ext)
        files_name.append(filename)

    return File(list_name=files_name)
Example #10
0
def download_file(project_id, user, path):
    c, p = before_project_service(project_id, user)
    if c is not None:
        return c, None
    url = File.download_file(project_id, path)
    if url:
        return code_list.Success, url
    return code_list.FileNoExists, None
Example #11
0
def to_file(file_tuple, prefix_path):
    path, meta_information = file_tuple
    full_path = '{0}{1}'.format(prefix_path, path)
    priority = 1 if path.endswith('Beschreibung_Stationen.txt') else 0
    return File(filename=path.name,
                path=full_path,
                meta_information=meta_information,
                priority=priority)
Example #12
0
def delete_file(project_id, user, path):
    c, p = before_project_service(project_id, user)
    if c is not None:
        return c, None
    r = File.delete_file(project_id, path)
    if r is None:
        return code_list.FileNoExists
    return code_list.Success
Example #13
0
    def create_from_json(body: dict, download=False):
        module = Module(body['id'], body['name'], body['url'], download)

        for content in body['contents']:
            new_file = File.create_file_or_linkablecontent(content, download)
            if new_file is not None:
                module.add_content(new_file)

        return module
Example #14
0
    def feed(self):
      id = self.params['id']
      if not id:
        self.error(404)
        return

      file = File.get_by_id(int(id))
      if not file:
        self.error(404)
        return

      self.render(binary=file.body,content_type=str(file.content_type))
Example #15
0
    def __init__(self, url, mime, image):
        ''' Constructor. '''

        parsed = urllib.parse.urlparse(url)
        name = os.path.basename(parsed.path)

        self.upstream_url = url
        self.file = File(name, mime, image)
        now = datetime.now()
        self.start_date = now
        self.end_date = now

        thumb_file = io.BytesIO(image)
        thumb = Image.open(thumb_file)
        # Handle files that are in palette rather than RGB mode
        if thumb.mode != 'RGB':
            thumb = thumb.convert('RGB')
        thumb.thumbnail(THUMB_SIZE)
        thumb_file.seek(0)
        thumb.save(thumb_file, format='JPEG')
        thumb_file.seek(0)
        self.thumb_file = File('thumb-{}'.format(name), mime, thumb_file.read())
Example #16
0
    def upload(self):
      if self.request.method.upper() == "GET":
        return

      file_data = self.request.get("up_file")
      res= {'status':'success','msg':"アップロードが完了しました"}

      if file == None:
        res= {'status':'error',"msg":"ファイルが不正です"}
      else:
        length = len(file_data)
        if length >= ( 1 * 1024 * 1024):
          wk = length / 1024 ;
          wk2 = "ファイルサイズが大きすぎます(%sKB)。1MB以下にしてください" % str(wk)
          res={ 'status': 'error','msg': wk2}
        else:
          # 値セット
          type = self.request.body_file.vars['up_file'].headers['Content-Type']
          name = self.request.body_file.vars['up_file'].filename.decode('utf-8')
          rec  = File(filename=name,orginal_filename=name,content_type=type,content_length=length,body=file_data) 
          rec.put()

      self.render(json=self.to_json(res))
Example #17
0
def upload_file(project_id, user, file, path):
    c, p = before_project_service(project_id, user)
    if c is not None:
        return c
    filename = file.filename if file else ''
    path = path or ""

    s = File.upload_file(project_id=project_id,
                         file=file,
                         path=path,
                         filename=filename,
                         tag="upload=%s" % user.id)
    if s:
        return code_list.Success
    return code_list.OtherError
Example #18
0
def populate_config(conn: Connection, config: Config):
    """
    This function is responsible for adding the already present courses to the config.
    THIS FUNCTION MUTATES THE config PARAM
    :param conn: DB connection
    :param config: Moodle configuration
    """
    rows = get_config_courses(conn, config.id)

    # Create the courses and populate each one
    # TODO test this code
    for row in rows:
        course = Course.create_from_db(row)

        # Create the sections and populate each one
        rows_sections = get_course_sections(conn, config.id, course.id)

        for r_section in rows_sections:
            section = Section.create_from_db(r_section)

            # Add the LinkableContents
            section.modules = __get_linkablecontents(conn, config.id,
                                                     section.id, True)

            # Create the modules, populate each one and add them to the section
            rows_modules = get_section_modules(conn, config.id, section.id)
            for r_module in rows_modules:
                module = Module.create_from_db(r_module)

                # Add the LinkableContents
                module.contents = __get_linkablecontents(
                    conn, config.id, module.id, False)

                # Add the Files
                rows_files = get_module_files(conn, config.id, module.id)

                for r_file in rows_files:
                    module.add_content(
                        File.create_file_or_linkablecontent(r_file))

                # Add the module to the section
                section.add_module(module)

            # Add the section to the course
            course.add_section(section)

        # Add the course to the config
        config.add_course(course)
Example #19
0
def test_move_random(app):
    files = app.random_existing_items(item="file", random_number=3)
    files_name = File(list_name=[x.name for x in files])
    album = app.random_existing_items(item="album", random_number=1)

    if not album:
        new_album = Album(name=random_string(max_len_str=10),
                          privacy=random_existing_item(
                              ["public", "private_but_link"]))
        app.album.create(new_album)
        album = app.album.info_about_albums_by_name([new_album])
    app.album.move_to_album(album=album, file=files_name)
    new_info_about_files = app.file.get_info_about_file(files_name)

    for item in new_info_about_files:
        assert item.id_album == album[0].id_album
Example #20
0
    def delete(self):
      if self.request.method.upper() == "GET":
        return
      id = self.params['id']
      if not id:
        self.error(404)
        return

      res= {"status":"success","msg":"削除しました"}
      file = File.get_by_id(int(id))
      if file:
        file.delete()
      else:
        res= {"status":"error","msg":"削除に失敗しました"}

      self.render(json=self.to_json(res))
Example #21
0
 def get_file_list(self):
     wd = self.app.wd
     self.app.navigation.open_images()
     files = []
     list_files = wd.find_elements_by_css_selector(".list-item")
     for item in list_files:
         id_file = item.get_attribute("data-id")
         name = item.get_attribute("data-title")
         description = item.get_attribute("data-description")
         id_album = item.get_attribute("data-album-id")
         privacy = item.get_attribute("data-privacy")
         files.append(
             File(id_file=id_file,
                  name=name,
                  description=description,
                  id_album=id_album,
                  privacy=privacy))
     return files
Example #22
0
    def fetch_all_meta_information(self, path=None):
        if path is None:
            return []

        ftp = FTP(self.server)
        ftp.login(self.username, self.password)
        ftp.cwd(path)
        ls = ftp.mlsd()

        files = list()
        for entry in ls:
            folder, meta_information = entry
            if is_directory(entry):
                if is_main_directory(entry):
                    files.extend(
                        self.fetch_all_meta_information('{0}{1}/'.format(
                            path, folder)))
            else:
                file = File('{0}{1}'.format(path, folder), meta_information)
                files.append(file)
        ftp.quit()
        return files
Example #23
0
def test_delete_random_album(app):
    random_albums = app.random_existing_items(item="album", random_number=2)

    if not random_albums:
        app.album.create(
            Album(name=random_string(10),
                  description=random_string(50),
                  privacy=random_existing_item(["public",
                                                "private_but_link"])))

    list_files_in_random_albums = app.file.get_images_in_albums(random_albums)

    if list_files_in_random_albums:
        files_for_move = File(
            list_name=list([x.name for x in list_files_in_random_albums]))
        app.album.move_to_album([Album(name="")], files_for_move)

    app.album.delete(random_albums)
    new_list_albums = app.album.get_album_list()
    diff_items = app.album.difference_in_lists_album(random_albums,
                                                     new_list_albums)
    # assert sorted(diff_items, key=lambda albums: albums.name) == sorted(random_albums, key=lambda albums: albums.name)
    assert diff_items == random_albums
    assert len(new_list_albums) == app.album.count_ui_albums()
Example #24
0
class TestFile(TestCase):
    def setUp(self):
        config = ""
        self.client = File(config)
        self.client.insert_previous_html(
            "<html><body><h1>TEST</h1></body></html>")
        self.client.insert_previous_diff("html > body > h1")

    def test_get_exec_count(self):
        actual = self.client.get_exec_count()
        self.assertEqual(1, actual)

    def test_get_previous_html(self):
        actual = self.client.get_previous_html()
        self.assertEqual("<html><body><h1>TEST</h1></body></html>",
                         actual["html"])

    def test_update_exec_count(self):
        self.client.update_exec_count()
        actual = self.client.get_exec_count()
        self.assertEqual(2, actual)

    def test_update_previous_html(self):
        self.client.update_previous_html(
            "<html><body><h1>TEST</h1><h2>TEST2</2></body></html>")
        actual = self.client.get_previous_html()
        self.assertEqual(
            "<html><body><h1>TEST</h1><h2>TEST2</2></body></html>",
            actual["html"])

    def test_find_diff_from_previous(self):
        actual = self.client.find_diff_from_previous("html > body > h1")
        self.assertEqual(1, actual["id"])
        self.assertEqual("html > body > h1", actual["diff"])
        self.assertEqual(1, actual["count"])

    def test_find_diff_from_previous_empty(self):
        actual = self.client.find_diff_from_previous("invalid key")
        self.assertIsNone(actual)

    def test_insert_previous_diff(self):
        previous = self.client.find_diff_from_previous("key1")
        self.assertIsNone(previous)
        self.client.insert_previous_diff("key1")
        actual = self.client.find_diff_from_previous("key1")
        self.assertIsNotNone(actual)

    def test_update_previous_diff(self):
        previous = self.client.find_diff_from_previous("html > body > h1")
        self.assertEqual(1, previous["count"])
        self.client.update_previous_diff("html > body > h1")

        actual = self.client.find_diff_from_previous("html > body > h1")
        self.assertEqual(2, actual["count"])

    def test_insert_or_update_diff_with_new_diff(self):
        actual = self.client.insert_or_update_diff("key1")
        self.assertEqual("key1", actual["diff"])
        self.assertEqual(1, actual["count"])

    def test_insert_or_update_diff_with_exist_diff(self):
        actual = self.client.insert_or_update_diff("html > body > h1")
        self.assertEqual("html > body > h1", actual["diff"])
        self.assertEqual(2, actual["count"])

    def tearDown(self):
        self.client.drop()
Example #25
0
def get_file_list(project_id, user, prefix):
    c, p = before_project_service(project_id, user)
    if c is not None:
        return c, None
    f = File.get_file_list(project_id=project_id, prefix=prefix)
    return code_list.Success, f
Example #26
0
class MainController(SheetController, AudioController):
    def __init__(self):
        SheetController.__init__(self)
        AudioController.__init__(self)
        self.newFile()
        self._selectedInst = None
        self._state = STATE.EDITING

    # Track part
    def getTrack(self, trackID):
        '''Track ID needed'''
        if trackID not in self._curFile.tracks:
            raise ValueError('Track ID {} not found when get'.format(trackID))
        return self._curFile.tracks[trackID]

    def getTrackIDList(self):
        return self._curFile.tracks.keys()

    def getTrackNum(self):
        return len(self._curFile.tracks)

    def getTrackInst(self, trackID):
        '''Track ID needed'''
        return self.getTrack(trackID).inst

    def getTrackVel(self, trackID):
        '''Track ID needed'''
        return self.getTrack(trackID).vel

    def setCurTrack(self, trackID):
        '''Track ID needed'''
        if trackID == None:
            self._curTrack = None
            self._curTrackID = None
        else:
            self._switchTrack(trackID, self.getTrack(trackID))

    def setTrackInst(self, trackID, inst):
        '''Track ID, and inst number needed'''
        if inst not in INSTRUMENT:
            raise ValueError('Instrument {} not found'.format(inst))
        self.getTrack(trackID).inst = inst

    def setTrackVel(self, trackID, vel):
        '''Track ID, and vel number needed'''
        if not isinstance(vel, int) or vel not in VEL_RANGE:
            raise ValueError(
                'Velocity must be int within 0~127, not {}'.format(vel))
        self.getTrack(trackID).vel = vel

    def addTrack(self, inst=0, vel=100):
        '''Inst number and vel nuber needed, with default 0(piano) and 100'''
        if self.getTrackNum() >= MAX_TRACK_NUM:
            raise OverflowError(
                'Track numbers cannot exceed {}'.format(MAX_TRACK_NUM))
        return self._curFile.addTrack(inst, vel)

    def delTrack(self, trackID):
        '''Track ID needed'''
        if trackID not in self._curFile.tracks:
            raise ValueError('Track ID {} not found when del'.format(trackID))
        if trackID == self._curTrackID:
            self.setCurTrack(None)
        self.notify()
        self._curFile.delTrack(trackID)
        self.notify()
        self._state = STATE.DEFAULT

    def getAnyTrackNotesInfo(self, trackID, keys, on, off):
        """ Fetches info of all notes satisfying that
            1) note.key is in list of keys;
            2) the interval [note.on, note.off) has non-empty intersection with [on,off).
        
        Keyword Args:
            keys (<list>int): the list of 'key' property.
            on (int): the start time of the interval.
            off (int): the end time of the interval.

        Return: a list-dict structure. For example:
                [noteInfo1,noteInfo2,noteInfo3]
            where
                noteInfo1 = {'key':18,'vel':100,'on':13,'off':14},
                noteInfo2 = {'key':19,'vel':100,'on':14,'off':15},
                noteInfo3 = {'key':16,'vel':100,'on':15,'off':16}.

        P.S.
            Notes are not neccessarily contained in the area.
        """
        noteInfoList = []
        track = self.getTrack(trackID)
        noteIDList = track.search(on=self._toTick(on),
                                  off=self._toTick(off),
                                  keys=keys)
        for noteID in noteIDList:
            note = track.getNote(noteID)
            noteInfo = {}
            noteInfo['Key'] = note.key
            noteInfo['Velocity'] = note.vel
            noteInfo['On'] = self._toSec(note.on)
            noteInfo['Off'] = self._toSec(note.off)
            noteInfoList.append(noteInfo)
        return noteInfoList

    # File part
    def setBPM(self, bpm):
        if not isinstance(bpm, int) or bpm < 0 or bpm > 200:
            raise ValueError(
                'bmp must be int within 0~200, not {}'.format(bpm))
        self._curFile.bpm = bpm

    def getBPM(self):
        return self._curFile.bpm

    def newFile(self):
        self._curFile = File(bpm=DEFAULT_BPM)

    def saveFile(self, fileName='temp.nm'):
        '''use midi instead!'''
        return
        try:
            pickle.dump(self._curFile, fileName)
        except:
            print('use midi instead!')
        return

    def loadFile(self, fileName='temp.nm'):
        '''use midi instead!'''
        return
        try:
            self._curFile = pickle.load(fileName)
        except:
            print('filename not exists!')
        self.notify()

    def export(self, fileType, fileName):
        '''
        fileType should be str, current support 'wav' and 'mid'
        '''
        mid = self._curFile.toMidi()
        if fileType == 'mid':
            mid.save(fileName)
        elif fileType == 'wav':
            buf = BytesIO()
            mid.save(file=buf)
            self._getSample(buf, mid.length, export=True, filename=fileName)
        else:
            raise NotImplementedError

    # Selection part
    def getSelectedInst(self):
        return self._selectedInst

    def setSelectedInst(self, inst):
        self._selectedInst = inst

    # Play part
    def playAll(self):
        '''read all tracks and play'''
        mid = self._curFile.toMidi()
        buf = BytesIO()
        mid.save(file=buf)
        self._play(buf, mid.length)

    def playSingle(self, key):
        '''use in piano roll'''
        self._playSingle(self._curTrack.inst, key)

    def pauseAll(self):
        '''pause all, including playAll and playTrack'''
        self._pause()

    def playTrack(self, trackID):
        ''''read certain track and play'''
        mid = self.getTrack(trackID).toMidi(self.getBPM())
        buf = BytesIO()
        mid.save(file=buf)
        self._play(buf, mid.length)

    # Demo part
    """
Example #27
0
 def index(self):
   self.files = File.all().order('-uploaded_at')
Example #28
0
    def get_paper(self, paper_url=None, paper_id=None):
        """
        Load paper details for the paper given by detail page URL
        or numeric ID
        """
        paper_url = ('%svo020.asp?VOLFDNR=%s' %
                     (self.config['scraper']['base_url'], paper_id))
        logging.info("Getting paper %d from %s", paper_id, paper_url)

        # Stupid re-try concept because AllRis sometimes misses
        # start < at tags at first request.
        try_counter = 0
        while True:
            try:
                response = self.get_url(paper_url)
                if not response:
                    return
                if "noauth" in response.url:
                    logging.warn("Paper %s in %s seems to private", paper_id,
                                 paper_url)
                    return
                text = response.text
                doc = html.fromstring(text)
                data = {}

                # Beratungsfolge-Table checken
                # lets hope we always have this table
                table = self.table_css(doc)[0]
                self.consultation_list_start = False
                last_headline = ''
                for line in table:
                    if line.tag == 'tr':
                        headline = line[0].text
                    elif line.tag == 'td':
                        headline = line.text
                    else:
                        logging.error("ERROR: Serious error in data table. "
                                      "Unable to parse.")
                    if headline:
                        headline = headline.split(":")[0].lower()
                        if headline[-1] == ":":
                            headline = headline[:-1]
                        if headline == "betreff":
                            value = line[1].text_content().strip()
                            # There is some html comment with a script
                            # tag in front of the text which we remove.
                            value = value.split("-->")[1]
                            # remove all multiple spaces from the string
                            data[headline] = " ".join(value.split())
                        elif headline in [
                                'verfasser', u'federführend', 'drucksache-art'
                        ]:
                            data[headline] = line[1].text.strip()
                        elif headline in ['status']:
                            data[headline] = line[1].text.strip()
                            # related papers
                            if len(line) > 2:
                                if len(line[3]):
                                    # Gets originalId. is there something
                                    # else at this position? (will break)
                                    paper_id = line[3][0][0][1][0].get(
                                        'href').split('=')[1].split('&')[0]
                                    data['relatedPaper'] = [
                                        Paper(originalId=paper_id)
                                    ]

                        # Lot's of scraping just because of the date (?)
                        elif headline == "beratungsfolge":
                            # The actual list will be in the next row
                            # inside a table, so we only set a marker.
                            self.consultation_list_start = True
                        elif self.consultation_list_start:
                            elem = line[0][0]
                            # The first line is pixel images, so skip
                            # it, then we need to jump in steps of two.
                            amount = (len(elem) - 1) / 2
                            consultations = []
                            date_list = []
                            i = 0
                            item = None
                            for elem_line in elem:
                                if i == 0:
                                    i += 1
                                    continue
                                """
                                Here we need to parse the actual list which can have different forms. A complex example
                                can be found at http://ratsinfo.aachen.de/bi/vo020.asp?VOLFDNR=10822
                                The first line is some sort of headline with the committee in question and the type of consultation.
                                After that 0-n lines of detailed information of meetings with a date, transscript and decision.
                                The first line has 3 columns (thanks to colspan) and the others have 7.

                                Here we make every meeting a separate entry, we can group them together later again if we want to.
                                """

                                # now we need to parse the actual list
                                # those lists
                                new_consultation = Consultation()
                                new_consultation.status = \
                                        elem_line[0].attrib['title'].lower()
                                if len(elem_line) == 3:
                                    # The order is "color/status", name of
                                    # committee / link to TOP, more info we
                                    # define a head dict here which can be
                                    # shared for the other lines once we find
                                    # another head line we will create a new
                                    # one here.
                                    new_consultation.role = \
                                            elem_line[2].text.strip()

                                    # Name of committee, e.g.
                                    # "Finanzausschuss", unfort. without id
                                    #'committee' : elem_line[1].text.strip(),
                                # For some obscure reasons sometimes action
                                # is missing.
                                elif len(elem_line) == 2:
                                    # The order is "color/status", name of
                                    # committee / link to TOP, more info.
                                    status = \
                                            elem_line[0].attrib['title'].lower()
                                    # We define a head dict here which can be
                                    # shared for the other lines once we find
                                    # another head line we will create a new
                                    # one here.
                                    # name of committee, e.g.
                                    # "Finanzausschuss", unfort. without id
                                    #'committee' : elem_line[1].text.strip(),
                                elif len(elem_line) == 7:
                                    try:
                                        # This is about line 2 with lots of
                                        # more stuff to process.
                                        # Date can be text or a link with that
                                        # text.
                                        # We have a link (and ignore it).
                                        if len(elem_line[1]) == 1:
                                            date_text = elem_line[1][0].text
                                        else:
                                            date_text = elem_line[1].text
                                        date_list.append(
                                            datetime.datetime.strptime(
                                                date_text.strip(), "%d.%m.%Y"))
                                        if len(elem_line[2]):
                                            # Form with silfdnr and toplfdnr
                                            # but only in link (action=
                                            #   "to010.asp?topSelected=57023")
                                            form = elem_line[2][0]
                                            meeting_id = form[0].attrib[
                                                'value']
                                            new_consultation.meeting = [
                                                Meeting(originalId=meeting_id)
                                            ]
                                            # Full name of meeting, e.g.
                                            # "A/31/WP.16 öffentliche/
                                            #   nichtöffentliche Sitzung des
                                            # Finanzausschusses"
                                            #item['meeting'] = \
                                            #    elem_line[3][0].text.strip()
                                        else:
                                            # No link to TOP. Should not be
                                            # possible but happens.
                                            #   (TODO: Bugreport?)
                                            # Here we have no link but the text
                                            # is in the TD directly - will be
                                            # scaped as meeting.
                                            #item['meeting'] = \
                                            #    elem_line[3].text.strip()
                                            logging.warn(
                                                "AgendaItem in consultation "
                                                "list on the web page does not "
                                                "contain a link to the actual "
                                                "meeting at paper %s",
                                                paper_url)
                                        toplfdnr = None
                                        if len(elem_line[6]) > 0:
                                            form = elem_line[6][0]
                                            toplfdnr = form[0].attrib['value']
                                        if toplfdnr:
                                            new_consultation.originalId = \
                                                    "%s-%s" % (toplfdnr,
                                                               paper_id)
                                            # actually the id of the transcript
                                            new_consultation.agendaItem = \
                                                    AgendaItem(
                                                        originalId=toplfdnr)
                                            # e.g. "ungeändert beschlossen"
                                            new_consultation.agendaItem.result \
                                                    = elem_line[4].text.strip()
                                            consultations.append(
                                                new_consultation)
                                        else:
                                            logging.error(
                                                "missing agendaItem ID in "
                                                "consultation list at %s",
                                                paper_url)
                                    except (IndexError, KeyError):
                                        logging.error(
                                            "ERROR: Serious error in "
                                            "consultation list. Unable to "
                                            "parse.")
                                        logging.error(
                                            "Serious error in consultation "
                                            "list. Unable to parse.")
                                        return []
                                i += 1
                            # Theory: we don't need this at all, because it's
                            # scraped at meeting.
                            #data['consultations'] = consultations
                            # set the marker to False again as we have read it
                            self.consultation_list_start = False
                    last_headline = headline
                    # We simply ignore the rest (there might not be much more
                    # actually).
                # The actual text comes after the table in a div but it's not
                # valid XML or HTML this using regex.
                data['docs'] = self.body_re.findall(response.text)
                first_date = False
                for single_date in date_list:
                    if first_date:
                        if single_date < first_date:
                            first_date = single_date
                    else:
                        first_date = single_date
                paper = Paper(originalId=paper_id)
                paper.originalUrl = paper_url
                paper.name = data['betreff']
                paper.description = data['docs']
                if 'drucksache-art' in data:
                    paper.paperType = data['drucksache-art']
                if first_date:
                    paper.publishedDate = first_date.strftime("%d.%m.%Y")
                # see theory above
                #if 'consultations' in data:
                #    paper.consultation = data['consultations']
                paper.auxiliaryFile = []
                # get the attachments step 1 (Drucksache)
                file_1 = self.attachment_1_css(doc)
                if len(file_1):
                    if file_1[0].value:
                        href = ('%sdo027.asp' %
                                self.config['scraper']['base_url'])
                        original_id = file_1[0].value
                        name = 'Drucksache'
                        main_file = File(originalId=original_id, name=name)
                        main_file = self.get_file(main_file, href, True)
                        paper.mainFile = main_file
                # get the attachments step 2 (additional attachments)
                files = self.attachments_css(doc)
                if len(files) > 0:
                    if len(files[0]) > 1:
                        if files[0][1][0].text.strip() == "Anlagen:":
                            for tr in files[0][2:]:
                                link = tr[0][0]
                                href = ("%s%s" %
                                        (self.config['scraper']['base_url'],
                                         link.attrib["href"]))
                                name = link.text
                                path_tokens = link.attrib["href"].split('/')
                                original_id = "%d-%d" % (int(
                                    path_tokens[4]), int(path_tokens[6]))
                                aux_file = File(originalId=original_id,
                                                name=name)
                                aux_file = self.get_file(aux_file, href)
                                paper.auxiliaryFile.append(aux_file)
                print paper.auxiliaryFile
                if not len(paper.auxiliaryFile):
                    del paper.auxiliaryFile
                oid = self.db.save_paper(paper)
                return
            except (KeyError, IndexError):
                if try_counter < 3:
                    logging.info("Try again: Getting paper %d from %s",
                                 paper_id, paper_url)
                    try_counter += 1
                else:
                    logging.error("Failed getting paper %d from %s", paper_id,
                                  paper_url)
                    return
def replace_name(file):
    res = []
    for name in file.list_name:
        res.append(name.replace("_", "-"))
    return File(list_name=res)
Example #30
0
 def to_file(row):
     date, path = row
     return File(path=path, modify_date=date)
Example #31
0
from model.file import File
from model.album import Album

testdata = [[
    File(list_name=["atom", "ae", "179px"]),
    Album(name="random albums name"), None
], [File(list_name=["ledy", "639px", "atom"]),
    Album(name=""), None], [None,
                            Album(name="2 album for move files"), "all"],
            [None, Album(name=""), "all"]]
Example #32
0
from model.file import File

testdata = [
    File(dir='tnc_69881045.jpg'),
    File(dir='archive.rar'),
    File(dir='archive.zip'),
    File(dir='video.avi'),
    File(dir='video.mkv'),
    File(dir='video.mp4'),
    File(dir='video.mpg'),
    File(dir='image.bmp'),
    File(dir='image.jpg'),
    File(dir='image.tif'),
    File(dir='book.fb2'),
    File(dir='audio.mp3'),
    File(dir='table.csv'),
    File(dir='table.xlsx'),
    File(dir='text.docx'),
    File(dir='text.html'),
    File(dir='text.pdf'),
    File(dir='text.rtf'),
    File(dir='text.txt')
]
    try:
        file = open(path, 'r')
        lines = file.read().split('\n')
        file.close()
        location = os.path.join(os.path.abspath(path))
        re_match = re.search(filename_regex, path)
        filename = re_match.group()

    except (OSError, IOError):
        print(f'File: {os.path.join(os.path.abspath(path))} not found.')
        lines = []

    return (filename, location, lines)


filenames = get_filenames('sample-es6/src')
files = []

for filename in filenames:
    file_read = read_file(filename)
    new_file = File(file_read[0], file_read[1], file_read[2])
    files.append(new_file)

for file in files:
    print(file)

# file = read_file('sample-es6/src/js/html.js')
# file = read_file('test2.js')
# new_file = File(file[0], file[1], file[2])
# print(new_file)
Example #34
0
 def setUp(self):
     config = ""
     self.client = File(config)
     self.client.insert_previous_html(
         "<html><body><h1>TEST</h1></body></html>")
     self.client.insert_previous_diff("html > body > h1")