Ejemplo n.º 1
0
def gen_data(fn,
             rows_keep=10,
             init_cols=2,
             last_data_cols=5,
             saveas='scratch.csv',
             verbose=False):
    """Allows you to specify which rows/cols of a csv file you want to use for
          a test file.  (First couple of cols, and ones at end; a few rows typically).
    	  saves it as a csv and prints it to console.  Fast way to build up testing
    	  assets."""
    vendor = 'ACSI'
    path = os.path.join("..", "..", "current_wave_data")
    full_path = os.path.join(os.path.abspath(path), vendor, fn)
    csv = pd.read_csv(full_path)
    csv.replace(nan_value, nan, inplace=True)

    good_cols = range(init_cols) + [col for col in range(-last_data_cols, 0)]
    keep = csv.iloc[:rows_keep, good_cols]

    save_path = os.path.join(os.path.abspath(path), vendor, saveas)

    if verbose:
        print(save_path)
        print("(from {})".format(full_path))
        print()
        print(keep.__repr__())
        print()

    #keep.to_csv(save_path, na_rep = nan, index = False)

    return (keep)
Ejemplo n.º 2
0
def addToInstaller(csvList,installername):
    """Takes a list of csv files and adds them to the EA installer"""
    with open(installername,"w") as myfile:
        myfile.write("//EA Table Installation file generated by c2ea.exe\n\n")
        myfile.write('#include "Table Definitions.txt"\n\n')

    for csv in csvList:
        nmpath = csv.replace(".csv",".nmm")
        nmm = nightmare.NightmareTable(nmpath)
        filename = csv.replace(".csv",".event") #I don't wanna use .txt because it conflicts, but this is supposed to be a text file!
        filename = os.path.relpath(filename, os.path.dirname(installername)) # filename.replace(os.getcwd()+'\\','')
        with open(installername,'a') as myfile:
            #myfile.write("ORG " + hex(nmm.offset) + '\n') Don't put the offset here, have it in the dmp.
            myfile.write('#include "' + filename + '"\n\n')
Ejemplo n.º 3
0
    def return_old_new(self, csv=None, pkl=None):
        """
        :param version: string of 'old' or 'new'. Raises exception if not either 'old' or 'new'.
        If string is 'old', returns filenames which contain 'old' as 3rd slot delineated by underscores, after
        file path has been cleaned from file path.
        :param csv: List of strings representing csv files to filter by version.
        :param pkl: List of strings representing pkl files to filter by version.
        :return: Tuple of (csv, pkl), each being list of strings, each element corresponding to old/new file.
                Includes file path in each element.
        """
        csv_files = csv
        pkl_files = pkl

        if csv is None or pkl is None:
            csv_files = self.analyze_csv
            pkl_files = self.analyze_pickle

        # TODO: Currently, each csv/pkl file in function arguments must contain self.csv_path or self.pickle_path
        #  respectively for this function to work. This is because we first clean up the strings by removing the
        #  file path prefix. This needs to be changed so that we split on some character that delineates the end
        #  of the path prefix and the start of the actual file name.

        csv_data = [csv.replace(self.prediction_path, '') for csv in csv_files]
        pkl_data = [pkl.replace(self.annotation_path, '') for pkl in pkl_files]

        old_csv = [self.prediction_path + csv for csv in csv_data]
        old_pkl = [self.annotation_path + pkl for pkl in pkl_data]
        return old_csv, old_pkl

        raise Exception('Error: version must be \'old\' or \'new\'.')
Ejemplo n.º 4
0
def read_matrices(path, index_col=None, skip=None):
    res = {}
    csvs = os.listdir(path)
    for csv in csvs:
        if '.csv' in csv:
            if not skip or skip not in csv:
                res[csv.replace('.csv', '')] = pd.read_csv(path + csv)
    return res
def importPreprocessData(filename = "preprocessedDataBalAGSampleclean.csv"):
    '''
    function that imports the file preprocessedDataBalAG or its clean version
    and put it into two arrays X and Y for the learning process.
    -- IN:
    filename : the name of the file to import (string) default = preprocessedDataBalAG.csv
    -- OUT:
    X : np.array containing features (np.array)
    Y : np.array containing observation (np.array)
    '''
    print "== Importing data"
    usecols = ['montantPieceEur','logMontant','echeance','year','age','scoreSolv','scoreZ','scoreCH','Y']
    usecolsX = ['logMontant','echeance','age','scoreSolv','scoreZ','scoreCH']
#     usecolsX = ['logMontant']
#     usecolsX = ['echeance']
#     usecolsX = ['logMontant','echeance','age','scoreSolv','scoreZ','scoreCH']
    usecolsY = ['Y']
    dtype = {}
    dtype['montantPieceEur'] = np.float64
    dtype['echeance'] = np.int16
    dtype['year'] = np.int16
    dtype['age'] = np.int16
    dtype['logMontant'] = np.float16
    dtype['scoreSolv'] = np.float16
    dtype['scoreZ'] = np.float16
    dtype['scoreCH'] = np.float16
    dtype['scoreAltman'] = np.float16
    dtype['Y'] = np.bool
    csv = pd.read_csv(filename,sep="\t",usecols=usecols, dtype=dtype)
    plus = 1
    moins = 1
    fY = lambda x : moins if x==True else -plus
    csv['Y'] = csv['Y'].apply(fY)
    csv.replace([np.inf,-np.inf], np.nan, inplace=True)
    csv.dropna(axis=0,how='any',inplace=True)
    print "   ...done"
    return (csv[usecolsX],csv[usecolsY])
Ejemplo n.º 6
0
def download_csv():
    titulo = request.form['producto']
    csv = request.form['lista']
    lista = csv.replace("], [", "\n")
    lista = lista.replace("'", "")
    lista = lista.replace("[", "")
    lista = lista.replace("]", "")
    response = make_response(titulo + '\n' + lista)

    ahora = datetime.now()
    nombre_archivo = 'attachment; filename=resultados_{}{}{}_{}{}{}.csv'.format(
        ahora.year, ahora.month, ahora.day, ahora.hour, ahora.minute,
        ahora.second)
    response.headers['Content-Disposition'] = nombre_archivo
    response.mimetype = 'text/csv'

    return response
Ejemplo n.º 7
0
    def test_fallback(self):
        csv = """
nvs,      data, nvs,     0x9000,  0x4000
otadata,  data, ota,     0xd000,  0x2000
phy_init, data, phy,     0xf000,  0x1000
ota_0,  app,    ota_0,   0x30000,  1M
ota_1,  app,    ota_1,          ,  1M
        """
        rpt = lambda args: self._run_parttool(csv, args)

        self.assertEqual(rpt("--type app --subtype ota_1 --offset"),
                         "0x130000")
        self.assertEqual(rpt("--default-boot-partition --offset"),
                         "0x30000")  # ota_0
        csv_mod = csv.replace("ota_0", "ota_2")
        self.assertEqual(
            self._run_parttool(csv_mod, "--default-boot-partition --offset"),
            "0x130000")  # now default is ota_1
Ejemplo n.º 8
0
def build_load_command(csvs):
    """
    Creates a bash script for data loading, intended to be executed from a location
    directly above neo4j import directory (datadir)
    """

    cmd = """#!/bin/bash
set -euo pipefail
IFS=$'\\n\\t'
neo4j-admin import --id-type STRING \\\n"""
    for csv in csvs:
        entity_name = csv.replace('.csv', '').capitalize()
        if entity_name == 'Author':
            cmd += f'--relationships:authored "./import/{csv}.header,./import/{csv}" \\\n'
        elif entity_name == 'Editor':
            cmd += f'--relationships:edited "./import/{csv}.header,./import/{csv}" \\\n'
        else:
            cmd += f'--nodes:{entity_name} "./import/{csv}.header,./import/{csv}" \\\n'
    return cmd
    def test_fallback(self):
        csv = """
nvs,      data, nvs,     0x9000,  0x4000
otadata,  data, ota,     0xd000,  0x2000
phy_init, data, phy,     0xf000,  0x1000
ota_0,  app,    ota_0,   0x30000,  1M
ota_1,  app,    ota_1,          ,  1M
        """

        def rpt(args):
            return self._run_parttool(csv, args)

        self.assertEqual(
            rpt(["--partition-type", "app", "--partition-subtype", "ota_1", "--info", "offset"]), b"0x130000")
        self.assertEqual(
            rpt(["--partition-boot-default", "--info", "offset"]), b"0x30000")  # ota_0
        csv_mod = csv.replace("ota_0", "ota_2")
        self.assertEqual(
            self._run_parttool(csv_mod, ["--partition-boot-default", "--info", "offset"]),
            b"0x130000")  # now default is ota_1
Ejemplo n.º 10
0
    def test_fallback(self):
        csv = """
nvs,      data, nvs,     0x9000,  0x4000
otadata,  data, ota,     0xd000,  0x2000
phy_init, data, phy,     0xf000,  0x1000
ota_0,  app,    ota_0,   0x30000,  1M
ota_1,  app,    ota_1,          ,  1M
        """

        def rpt(args, info):
            return self._run_parttool(csv, args, info)

        self.assertEqual(
            rpt("--partition-type=app --partition-subtype=ota_1 -q", "offset"), b"0x130000")
        self.assertEqual(
            rpt("--partition-boot-default -q", "offset"), b"0x30000")  # ota_0
        csv_mod = csv.replace("ota_0", "ota_2")
        self.assertEqual(
            self._run_parttool(csv_mod, "--partition-boot-default -q", "offset"),
            b"0x130000")  # now default is ota_1
Ejemplo n.º 11
0
    def check_load_csv(self):
        # TODO: First iterate through pkl test files. If there is a matching csv file, append both to respectile
        #  list. This will guarantee that element at each index in list corresponds to respective element at index in
        #  other list.
        set_of_csv = set()
        set_of_pkl = set()
        try:
            for csv in glob.glob(self.prediction_path + '*.csv'):
                # We clean the csv name to find just the file name:
                #   csv_not_done/abc.csv --> abc
                file_name = csv.replace(self.prediction_path,
                                        '').replace('.csv', '')
                set_of_csv.add(file_name)
        except:
            print('No CSV file in directory. Transfer some and run again')
        # Suffix for rebuilding pickle name.
        pickle_suffix = ''
        try:
            for picklefile in glob.glob(self.annotation_path + '*.h5'):
                file_name = picklefile.replace(self.annotation_path,
                                               '').replace(
                                                   '.mp4_annoated.h5', '')

                # Saves the suffix past the last '_' to rebuild pickle name later.
                pickle_suffix = file_name[file_name.rfind('_'):]
                # Finds the last occurence of '_', and takes the string up to that.
                #file_name = file_name[:file_name.rfind('_')]
                # test/abc_test.p --> abc
                set_of_pkl.add(file_name)
        except:
            print('No Pickle file in directory. Transfer some and run again')
        common_files = list(set_of_pkl.intersection(set_of_csv))
        # We rebuild list of csv and pickles from this intersection.
        for file in common_files:
            csv_name_rebuilt = self.prediction_path + file
            self.analyze_csv.append(csv_name_rebuilt)
            pickle_name_rebuilt = self.annotation_path + file + '.mp4_annoated.h5'
            print(pickle_name_rebuilt)
            self.analyze_pickle.append(pickle_name_rebuilt)
def create_db_from_csv(year=2018):
  csv_dir = "data/{}/player/".format(year)

  con = sqlite3.connect("{}_player.db".format(year))

  player_names = []
  csv_names = find_csv_filenames(csv_dir)
  for csv in csv_names:
    table = pandas.read_csv("{}/{}".format(csv_dir, csv))
    table = table.fillna(0)
    player_name = csv.replace(" ", "_").replace(".csv", "")
    table.to_sql(player_name, con=con, index=False)
    player_names.append(player_name)

  player_index_table = "_PLAYER_INDEX"
  con.cursor().execute("CREATE TABLE {} (id integer PRIMARY KEY, name text NOT NULL);".format(player_index_table))
  id = 0
  for name in player_names:
    con.cursor().execute("INSERT INTO {} (id, name) VALUES ({}, \"{}\");".format(player_index_table, id, name))
    id = id + 1

  con.commit();
  con.close()
Ejemplo n.º 13
0
    def test_fallback(self):
        csv = """
nvs,      data, nvs,     0x9000,  0x4000
otadata,  data, ota,     0xd000,  0x2000
phy_init, data, phy,     0xf000,  0x1000
ota_0,  app,    ota_0,   0x30000,  1M
ota_1,  app,    ota_1,          ,  1M
        """

        def rpt(args):
            return self._run_parttool(csv, args)

        self.assertEqual(
            rpt([
                '--partition-type', 'app', '--partition-subtype', 'ota_1',
                '--info', 'offset'
            ]), b'0x130000')
        self.assertEqual(rpt(['--partition-boot-default', '--info', 'offset']),
                         b'0x30000')  # ota_0
        csv_mod = csv.replace('ota_0', 'ota_2')
        self.assertEqual(
            self._run_parttool(
                csv_mod, ['--partition-boot-default', '--info', 'offset']),
            b'0x130000')  # now default is ota_1
Ejemplo n.º 14
0
with open('./data/tool_categories.json', 'r') as f:

    tool_categories = json.load(f)

to_path = '/home/ylk1996/Research/FinalCSCW_hopefully/data/comments/comments/'

exsting_comments = os.listdir(to_path)

#print(len(project_name_list))

#print(len(exsting_comments))

for csv in exsting_comments:

    csv = csv.replace('__', '/')

    csv = csv.replace('.csv', '')

    if csv in project_name_list:

        project_name_list.remove(csv)

print(len(project_name_list))

#--------------------End setting the parameters----------------------
############################################################################################################################################


def contain_non_English(check_str):
    '''
Ejemplo n.º 15
0
    def genImages(self, gen_ts):
        """Generate the images.

        The time scales will be chosen to include the given timestamp, with
        nice beginning and ending times.

        gen_ts: The time around which plots are to be generated. This will
        also be used as the bottom label in the plots.        """

        t1 = time.time()
        ngen = ZERO

        for species_name in self.cydia_dict.sections:
            # Get the path that the image is going to be saved to:
            plot_options = weeutil.config.accumulateLeaves(
                self.image_dict['year_images'])
            species_options = weeutil.config.accumulateLeaves(
                self.cydia_dict[species_name])
            plot_options.update(species_options)

            date_string = plot_options.get('end_date')
            if date_string:
                plotgen_ts = time.mktime(time.strptime(date_string,
                                                       '%m/%d/%Y'))
            else:
                plotgen_ts = gen_ts
                if plotgen_ts:
                    pass
                else:
                    plotgen_ts = time.time()

            date_string = plot_options.get('start_date')
            if date_string:
                start_date_ts = time.mktime(
                    time.strptime(date_string, '%m/%d/%Y'))
            else:
                now_tuple = time.localtime(plotgen_ts)
                new_year_tuple = [
                    now_tuple.tm_year, 1, 1, ZERO, ZERO, ZERO, ZERO, ZERO,
                    now_tuple.tm_isdst
                ]
                start_date_ts = time.mktime(tuple(new_year_tuple))

            image_root = os.path.join(self.config_dict['WEEWX_ROOT'],
                                      plot_options['HTML_ROOT'])
            img_file = os.path.join(image_root, '%s.png' % species_name)

            ai = 86400  #test mit 600 86400 = 24h

            # Calculate a suitable min, max time for the requested time.
            (minstamp, maxstamp,
             timeinc) = weeplot.utilities.scaletime(start_date_ts, plotgen_ts)

            # Now its time to find and hit the database:
            text_root = os.path.join(self.config_dict['WEEWX_ROOT'],
                                     plot_options['HTML_ROOT'])
            tmpl = self.skin_dict.get('CheetahGenerator', {}).get(
                'CydiaDDData', {}).get('template', 'Cydia/GREEN-YYYY.csv.tmpl')
            (csv, ext) = os.path.splitext(tmpl)
            csv_name = csv.replace('YYYY',
                                   str(time.localtime(plotgen_ts).tm_year))
            csv_file_name = os.path.join(text_root, '%s' % csv_name)
            spec_threshold_lo = plot_options.get(
                'threshold_lo', [10, 'degree_C'])  # [50, 'degree_F']
            threshold_lo_t = get_float_t(spec_threshold_lo,
                                         'group_temperature')
            spec_threshold_hi = plot_options.get(
                'threshold_hi', [31.1, 'degree_C'])  # [88, 'degree_F']
            threshold_hi_t = get_float_t(spec_threshold_hi,
                                         'group_temperature')
            recs = self.get_vectors((minstamp, maxstamp), csv_file_name,
                                    threshold_lo_t, threshold_hi_t)

            # Do any necessary unit conversions:
            self.vectors = {}
            for (key, val) in list(recs.items()):
                self.vectors[key] = self.converter.convert(val)

            t1_ts = time.time()
            if skipThisPlot(t1_ts, ai, img_file):
                pass
            else:
                # Create the subdirectory that the image is to be put in.
                # Wrap in a try block in case it already exists.
                try:
                    os.makedirs(os.path.dirname(img_file))
                except OSError:
                    pass

                self.plot = self.plot_image(
                    species_name,
                    plot_options,
                    plotgen_ts,
                    (minstamp, maxstamp, timeinc),
                    self.vectors,
                )
                # OK, the plot is ready. Render it onto an image
                image = self.plot.render()

                try:
                    # Now save the image
                    image.save(img_file)
                    ngen += 1
                except IOError as e:
                    log.info("cydiagenerator: Unable to save to file '%s' %s:",
                             img_file, e)
                t2 = time.time()
                if self.log_success:
                    log.info("Generated %d images for %s in %.2f seconds",
                             ngen, self.skin_dict['REPORT_NAME'], t2 - t1)

        return self
Ejemplo n.º 16
0
def main(csv, inDir, batchID): #* batchID to keep track of groups of pairs for processing

##    def run_asp(
##    csv,
##    outDir,     ##  ='/att/gpfsfs/userfs02/ppl/pmontesa/outASP',         #'/att/gpfsfs/userfs02/ppl/cneigh/nga_veg/outASP',
##    inDir,
##    nodesList,
##    mapprj,
##    mapprjRes,
##    par,
##    strip=True,
##    searchExtList=['.ntf','.tif','.NTF','.TIF'],        ## All possible extentions for input imagery ['.NTF','.TIF','.ntf','.tif']
##    csvSplit=False,
##    doP2D=True,
##    stereoDef='/att/gpfsfs/home/pmontesa/code/stereo.default',
##    DEMdir='/att/nobackup/cneigh/nga_veg/in_DEM/aster_gdem',
##    #mapprjDEM='/att/nobackup/cneigh/nga_veg/in_DEM/aster_gdem2_siberia_N60N76.tif',     ## for testing
##    prj='EPSG:32647',                                                                   ## default is for Siberia
##    test=False,                                                                         ## for testing
##    rp=100):

    DEMdir = '/att/pubrepo/ASTERGDEM/'
    DISCdir = '/discover/nobackup/projects/boreal_nga' # DISCOVER path, for writing the job scripts
    batchDir = os.path.join(inDir, 'batch%s' % batchID)
    os.system('mkdir -p %s' % batchDir)

    LogHeaderText = []


    LogHeaderText.append("Input csv file: %s" % csv)
    LogHeaderText.append("BatchID: %s" % batchID)


    # [1] Read csv of stereo shapefile footprints
    # This shapefile is provided by PGC or DG, and thus, the col names are specific to the attribute table of each
    # We have footprint code that we can run also. When want to run this script on a csv from a SHP kicked out from our footprint code,
    # we need to make sure we have coded for the same col names OR we need to change the col names specified in [2]
    csvStereo = open(csv, 'r')

    # Get the header
    header = csvStereo.readline().lower().rstrip().split(',')  #moved the split to this stage to prevent redudant processing - SSM


    # [2] From the header, get the indices of the attributes you need
    catID_1_idx     = header.index('catalogid')
    catID_2_idx     = header.index('stereopair')
    sensor_idx      = header.index('platform')
    avSunElev_idx   = header.index('avsunelev')
    avSunAzim_idx   = header.index('avsunazim')
    imageDate_idx   = header.index('acqdate')
    avOffNadir_idx  = header.index('avoffnadir')
    avTargetAz_idx  = header.index('avtargetaz')

    # Save all csv lines; close file
    csvLines = csvStereo.readlines()
    csvStereo.close()

    # Used for output CSV and VALPIX shapefile
    outHeader = "pairname, catID_1, catID_2, mapprj, year, month, avsunelev, avsunaz, avoffnad, avtaraz, avsataz, conv_ang, bie_ang, asym_ang, DSM\n"
    outHeaderList = outHeader.rstrip().split(',')


    ##* everything up until now has stayed (pretty much) the same
    ##* here I am removing the rest of the runASP code outside of the "with open output summary csv as csvOut" and will simply store the out Attrbiutes in a table then write them to the outCsv at the end
    # Set up an output summary CSV that matches input CSV
    # csvOutFile = csv.split(".")[0] + "_output_smry.csv" ##* old way, below is the same thing but more readable
    csvOutFile = csv.replace('.csv', '_output_smry.csv')
    print ''

    #csvOutFile = [] # this will store the out attributes so we can write to summary csv
    with open(csvOutFile, 'w') as c: c.write(outHeader)


    # create submission script file which will contain all commands needed to submit the job to slurm
    submission_file = os.path.join(batchDir, 'submit_jobs_batch%s.sh' % batchID)
    # ?? what all do we need here to run all the jobs ??
    with open(submission_file, 'w') as ff:
        ff.write('Enter parameters needed here\nAnd here\nEtc.\n\n')


    #------------------------------------------------------------------
    #       CSV Loop --> runs parallel_stereo for each line of CSV across all VMs
    #------------------------------------------------------------------
    # [3] Loop through the lines (the records in the csv table), get the attributes and run the ASP commands
    n_lines = len(csvLines) # number of pairs we are attempting to process
    pair_count = 0 # to print which pair we are at
    n_pair_copy = 0 # number of succeffully copied pairs
    for line in csvLines[0:2]: # AKA for pair, or record in the input table # TEST just 2 lines for now
        pair_count += 1
        print "Attemping to copy data for pair %d of %d" % (pair_count, n_lines)

        preLogText = []


        # Get attributes from the CSV
        linesplit = line.rstrip().split(',')
        preLogText.append("Current line from CSV file:")
        preLogText.append(line)
        preLogText.append(linesplit)

        catID_1    = linesplit[catID_1_idx]
        catID_2    = linesplit[catID_2_idx]
        sensor     = str(linesplit[sensor_idx])
        imageDate  = linesplit[imageDate_idx]
        avSunElev  = round(float(linesplit[avSunElev_idx]),0)
        avSunAz    = round(float(linesplit[avSunAzim_idx]),0)
        avOffNadir = round(float(linesplit[avOffNadir_idx]),0)
        avTargetAz = round(float(linesplit[avTargetAz_idx]),0)
        if avTargetAz <= 180:
            avSatAz = avTargetAz + 180
        else:
            avSatAz = avTargetAz - 180

        # Initialize DEM string
        mapprjDEM = ''

        # Get Image Date ##** can probably simplify this--- need to check with Paul to see which is the correct date format
        if imageDate != '':
            try:
                imageDate = datetime.strptime(imageDate,"%m/%d/%Y")
                preLogText.append( '\tTry 1: ' + str(imageDate))
            except Exception, e:
                pass
                try:
                    imageDate = datetime.strptime(imageDate,"%Y-%m-%d")
                    preLogText.append( '\tTry 2: ' + str(imageDate))
                except Exception, e:
                    pass
def cvt_csv_2_midi(filepath):
    time.sleep(4)
    csv_list = list(os.walk(filepath))[0][2]
    for csv in csv_list:
        midi = csv.replace('.csv', '.mid')
        os.system("tool\Csvmidi.exe .\\result\\" + csv + " .\\result\\" + midi)
Ejemplo n.º 18
0
def no_repeats(csv, sci):
    #print(type(csv))
    #print('\n')

    #print('csv',10*" ",csv)
    #print('sci',10*" ",sci)
    csv=csv.replace('\""', '')
    csv=csv.replace('  ', ' ')
    sci=sci.replace('\""', '')
    sci=sci.replace('  ', ' ')
    new_csv=[]
    new_sci=[]
    final_draft=[]
    if not csv:
        #print ('final 1',sci)
        return sci
    if not sci:
        #print ('final 2',sci)
        return csv
    if sci in csv:
        #print('passed')
        return False
    if (sci + '.') in csv:
        #print('passed')
        return False

    #FIXME: I haven't been able to seperate properly if there are abbre.
    temp_csv=list(csv.split('. '))

    #print('csv 1',10*" ",temp_csv)
    #for ele in temp_csv:
        #if '.' in ele:
            #new_csv.append(ele.replace('.',''))
        #else:
            #new_csv.append(ele)

    temp_sci=list(sci.split ('. '))

    #for ele in temp_sci:
        #if '.' in ele:
            #new_sci.append(ele.replace('.',''))
        #else:
            #new_sci.append(ele)
    temp2_csv=[]
    for ele in temp_csv:
        temp2_csv.append(ele.lower())

    for ele1 in temp_sci:
        ele=ele1.lower()
        if ele not in temp2_csv:
            final_draft.append(ele1)

    final_draft=final_draft+temp_csv
    final_draft=str(final_draft).strip('[]')
    final_draft=final_draft.replace(',',', ')
    final_draft=final_draft.replace(',  ',', ')
    final_draft=final_draft.replace(r'\\','')#need two \\ to make it valid to read one
    final_draft=final_draft.replace(r'\\"', '')
    final_draft=final_draft.replace(r'\\\\', '')
    final_draft=final_draft.replace('.,','.')

    #comma.join(final_draft)
    #print(new_sci)
    final_draft=final_draft.replace('S, J., Costa, V., Psaroulis, V., Arzoglou, L,','S. J., Costa, V., Psaroulis, V., Arzoglou, L.')
    #print('final',10*" ",final_draft)

    return final_draft
Ejemplo n.º 19
0
def no_repeats(csv, sci):
    #print(type(csv))
    #print('\n')

    #print('csv',10*" ",csv)
    #print('sci',10*" ",sci)
    csv = csv.replace('\""', '')
    csv = csv.replace('  ', ' ')
    sci = sci.replace('\""', '')
    sci = sci.replace('  ', ' ')
    new_csv = []
    new_sci = []
    final_draft = []
    if not csv:
        #print ('final 1',sci)
        return sci
    if not sci:
        #print ('final 2',sci)
        return csv
    if sci in csv:
        #print('passed')
        return False
    if (sci + '.') in csv:
        #print('passed')
        return False

    #FIXME: I haven't been able to seperate properly if there are abbre.
    temp_csv = list(csv.split('. '))

    #print('csv 1',10*" ",temp_csv)
    #for ele in temp_csv:
    #if '.' in ele:
    #new_csv.append(ele.replace('.',''))
    #else:
    #new_csv.append(ele)

    temp_sci = list(sci.split('. '))

    #for ele in temp_sci:
    #if '.' in ele:
    #new_sci.append(ele.replace('.',''))
    #else:
    #new_sci.append(ele)
    temp2_csv = []
    for ele in temp_csv:
        temp2_csv.append(ele.lower())

    for ele1 in temp_sci:
        ele = ele1.lower()
        if ele not in temp2_csv:
            final_draft.append(ele1)

    final_draft = final_draft + temp_csv
    final_draft = str(final_draft).strip('[]')
    final_draft = final_draft.replace(',', ', ')
    final_draft = final_draft.replace(',  ', ', ')
    final_draft = final_draft.replace(
        r'\\', '')  #need two \\ to make it valid to read one
    final_draft = final_draft.replace(r'\\"', '')
    final_draft = final_draft.replace(r'\\\\', '')
    final_draft = final_draft.replace('.,', '.')

    #comma.join(final_draft)
    #print(new_sci)
    final_draft = final_draft.replace(
        'S, J., Costa, V., Psaroulis, V., Arzoglou, L,',
        'S. J., Costa, V., Psaroulis, V., Arzoglou, L.')
    #print('final',10*" ",final_draft)

    return final_draft