def J2000_to_decimal_year(J2000): """ Convert J2000 values to decimal year. Note decimal years use midnight epoch of this year, while J2000 uses noon at 20000101. >>> J2000_to_decimal_year(139755661.1234) 2004.430253079763 Array case, used list comprehension to force Python to print out full precision, Numpy truncates to screen (values OK) >>> [x for x in J2000_to_decimal_year([139755661.1234,139755661.1235,139755661.1236])] [2004.4302530797629, 2004.4302530797661, 2004.430253079769] >>> J2000_to_decimal_year([]) array([], dtype=float64) """ singleton = is_not_iterable(J2000) if singleton: J2000 = (J2000, ) if len(J2000) < 1: return N.array([]) retval = [] for t2000 in J2000: YYYY = from_J2000(t2000, 'YYYYMMDD')[0:4] temp = (t2000 - to_J2000(YYYY + "0101000000")) / SECONDS_IN_YEAR + int(YYYY) retval.append(temp) if singleton: return retval[0] return N.array(retval)
def glob(globspec): """ Attempts to execite a glob. In certain situations, glob can return "IndexError: string index out of range." Handle this for the user and keep trying until it stops that pathological behavior. Try on valid case >>> glob(os.path.realpath(__file__)) # doctest: +ELLIPSIS ['.../mlib/shell.py'] Try on invalid path >>> glob("/neverexists/*") [] Try on multiple globs >>> from mlib._doctest import repo_path >>> sorted(glob([repo_path() + '/mlib/s*.py', repo_path() + '/mlib/co*.py'])) # doctest: +ELLIPSIS ['.../mlib/color.py', '.../mlib/correlation.py', '.../mlib/shell.py', '.../mlib/spanning.py'] """ if is_not_iterable(globspec): globspec = [globspec, ] globbed = False while not globbed: try: files = flatten([list(G.glob(x)) for x in globspec]) globbed = True except IndexError: # Nothing above should ever provide an index error unless the disk is being bizarre pass return files
def decimal_year_to_J2000(decimal_year): """ Convert decimal year to J2000. 12 hour difference in epoch. >>> decimal_year_to_J2000(J2000_to_decimal_year(139755661.1234)) 139755661.123403 Test array case, used a list comprehension to force Python to print the entire numerical value (Numpy truncates) >>> [x for x in decimal_year_to_J2000(J2000_to_decimal_year([139755661.1234,139755661.1234]))] [139755661.12340301, 139755661.12340301] >>> decimal_year_to_J2000(J2000_to_decimal_year([])) array([], dtype=float64) """ singleton = is_not_iterable(decimal_year) if singleton: decimal_year = (decimal_year, ) if len(decimal_year) < 1: return N.array([]) retval = [] for dy in decimal_year: YYYY = "%04d" % (int(dy)) retval.append( to_J2000(YYYY + "0101000000") + (dy - int(dy)) * SECONDS_IN_YEAR) if singleton: return retval[0] return N.array(retval)
def rm(targets): """ Copies a lot of os functionality so we can just have a unified interface (no exception thrown if missing) Also removes directories and symbolic links. Can handle globs. Be careful! Will fail on read only files. Args: targets: a singleton path or list of paths >>> from mlib._doctest import repo_path >>> testfile = repo_path()+"/doctest_working/testfile.txt" >>> with open(testfile,"w") as f: f.write("yikes") 5 >>> exists(testfile) True >>> rm(testfile) >>> exists(testfile) False >>> mkdir(testfile) >>> exists(testfile) True >>> rm(testfile) >>> exists(testfile) False >>> mkdir(testfile) >>> with open(testfile+"/blah.txt","w") as f: f.write("yikes") 5 >>> exists(testfile) True >>> rm(testfile) >>> exists(testfile) False """ if is_not_iterable(targets): targets = [targets, ] for target in targets: expanded_target = glob(target) for path in expanded_target: fullpath = full_file_path(path) try: os.unlink(path) continue except (IOError, OSError): pass try: shutil.rmtree(path, ignore_errors=True) except (IOError, OSError): pass
def cp(oldpaths, newpath): """Can accept an array of oldpaths to copy to the new (presumably dir). Won't get merged, just overwritten if it's a file by accident.""" if is_not_iterable(oldpaths): oldpaths = [oldpaths, ] for oldpath in oldpaths: for filer in glob(oldpath): try: shutil.copy(filer, newpath) except (IOError, OSError, shutil.Error): pass
def mv(oldpaths, newpath): """Move or rename files. Can accept array of oldpaths, premusable for a directory destination. Won't get merged if not dir. Also accepts globs. CANNOT REPLACE EXISTING FILES!""" if is_not_iterable(oldpaths): oldpaths = [oldpaths, ] for oldpath in oldpaths: for filer in glob(oldpath): try: shutil.move(filer, newpath) except (IOError, OSError, shutil.Error) as e: pass
def pad_to_length(strings, length=None, pad="_"): if is_not_iterable(strings): strings = (strings, ) # We need to calculate the length if not provided if length is None: length = max([len(x) for x in strings]) # Form return list retlist = [] for x in strings: if len(x) >= length: retlist.append(x[:length]) continue retlist.append(x + pad * (length - len(x))) return retlist
def J2000_to_sounding_ids(J2000, string=False): """ Create a sounding_id from J2000 values. "string" flag optionally returns strings instead of N.int64's N.int64 cases >>> J2000_to_sounding_ids(139755661.1234) 2004060601010112 >>> J2000_to_sounding_ids([139755661.1234,139755661.1234,139755661.1234]) array([2004060601010112, 2004060601010112, 2004060601010112]) >>> J2000_to_sounding_ids([]) array([], dtype=int64) >>> J2000_to_sounding_ids(463429933.0) 2014090806321300 string cases >>> J2000_to_sounding_ids(139755661.1234,string=True) '2004060601010112' >>> J2000_to_sounding_ids([139755661.1234, 139755661.1234, 139755661.1234],string=True) #doctest: +NORMALIZE_WHITESPACE array(['2004060601010112', '2004060601010112', '2004060601010112'], dtype='<U16') >>> J2000_to_sounding_ids([], string=True) #doctest: +NORMALIZE_WHITESPACE array([], dtype='<U1') >>> J2000_to_sounding_ids(463429933.0, string=True) '2014090806321300' """ singleton = is_not_iterable(J2000) if singleton: J2000 = (J2000, ) if string: if len(J2000) < 1: return N.array([], dtype=str) retval = [from_J2000(x, format="YYYYMMDDHHMMSSFF") for x in J2000] else: if len(J2000) < 1: return N.array([], dtype=N.int64) retval = [ N.int64(from_J2000(x, format="YYYYMMDDHHMMSSFF")) for x in J2000 ] if singleton: return retval[0] return N.array(retval)
def enforce_literal(string_array): """ Converts a string containing potentially functional characters for RegEx evaluation into a standard string using backslashes. string_array may be a list or array of strings or a singleton string. Should not modify this untroublesome string >>> print(enforce_literal("The Quick Brown Fox Jumped Over the L8zy dog!")) The Quick Brown Fox Jumped Over the L8zy dog! Parenthesis and brackets are especially troublesome (except for right bracket, interestingly) >>> print(enforce_literal("I want (this) dog! Bra[cket]")) I want \(this\) dog! Bra\[cket] Use all the special characters in a single go! Doctest string answers have extra \\ characters due to doctest's parser... reduce all \\ chars to \ to comprehend output >>> print(enforce_literal("\\ () [] ^ $ . | ? * {}")) \\\\ \(\) \[] \^ \$ \. \| \? \* \{} Array case >>> enforce_literal(["hi","th3re","(pattern?)"]) ['hi', 'th3re', '\\\\(pattern\\\\?\\\\)'] """ was_singleton = is_not_iterable(string_array) was_string = isinstance(string_array, str) if was_string: string_array = [string_array, ] retvals = [] for string in string_array: # '\\' must come first or else will match backslashes added by later chars for char in SPECIAL_CHARS: string = string.replace(char, '\\' + char) retvals.append(string) if len(retvals) == 1: retvals = retvals[0] return retvals
def map_values_to_rgba(values, palette=None, scale=False, uint8=False): """ Maps values (range 0 to 1) to (r,g,b) tuples based on a pallete. Args: values : normalized values from 0 to 1 that are to be mapped palette: a string representing the colormap to use. Any valid P.cm.* string is recognized, as well as rainbow_index sets. May also receive a specified colormap object. If None (default) uses currently selected colormap scale : If scale is True, will map values to range 0-1. Not normally preferred. Will be ignored if a singleton value is passed in (can't scale a single value) uint8 : If True, will map data to range 0-255 and cast as N.uint8 >>> import numpy as N Test basic usage >>> PP(map_values_to_rgba([0,1,2], palette = 'afmhot')) [(0.0, 0.0, 0.0, 1.0), (0.0078431372549019607, 0.0, 0.0, 1.0), (0.015686274509803921, 0.0, 0.0, 1.0)] Test basic usage with uint8 >>> PP(map_values_to_rgba([0,1,2], palette = 'afmhot', uint8 = True)) [array([ 0, 0, 0, 255], dtype=uint8), array([ 2, 0, 0, 255], dtype=uint8), array([ 4, 0, 0, 255], dtype=uint8)] Test Int64 case >>> PP(map_values_to_rgba(N.array([0,1,2],dtype=N.int64), palette = 'afmhot')) [(0.0, 0.0, 0.0, 1.0), (0.0078431372549019607, 0.0, 0.0, 1.0), (0.015686274509803921, 0.0, 0.0, 1.0)] Custom colormap example >>> PP(map_values_to_rgba(N.linspace(0,1,10), palette = 'hot_framed' )) [(0.0, 0.0, 0.0, 1.0), (0.4392156862745098, 0.0, 0.0, 1.0), (0.8784313725490196, 0.0, 0.0, 1.0), (1.0, 0.16666666666666666, 0.0, 1.0), (1.0, 0.38627450980392158, 0.0, 1.0), (1.0, 0.61372549019607847, 0.0, 1.0), (1.0, 0.83333333333333326, 0.0, 1.0), (1.0, 1.0, 0.12156862745098039, 1.0), (1.0, 1.0, 0.5607843137254902, 1.0), (1.0, 1.0, 1.0, 1.0)] >>> PP(map_values_to_rgba(N.linspace(0,1,10), palette = 'gist_rainbow_r' )) [(1.0, 0.0, 0.75, 1.0), (0.65323955669224221, 0.0, 1.0, 1.0), (0.056479113384484192, 0.0, 1.0, 1.0), (0.0, 0.56159420289855067, 1.0, 1.0), (0.0, 1.0, 0.84334809192494209, 1.0), (0.0, 1.0, 0.23192072527935914, 1.0), (0.36036036036036029, 1.0, 0.0, 1.0), (0.97509273979862188, 1.0, 0.0, 1.0), (1.0, 0.43137254901960786, 0.0, 1.0), (1.0, 0.0, 0.16, 1.0)] Custom colormap example >>> PP(map_values_to_rgba(N.linspace(0,1,10), palette = 'cold_framed')) [(0.0, 0.0, 0.0, 1.0), (0.0, 0.0, 0.32941176470588235, 1.0), (0.0, 0.0, 0.6588235294117647, 1.0), (0.0, 0.0, 1.0, 1.0), (0.32941176470588235, 0.16470588235294117, 1.0, 1.0), (0.6705882352941176, 0.3352941176470588, 1.0, 1.0), (1.0, 0.5, 1.0, 1.0), (1.0, 0.32941176470588235, 1.0, 1.0), (1.0, 0.1647058823529412, 1.0, 1.0), (1.0, 0.0, 1.0, 1.0)] Singleton example >>> map_values_to_rgba(0.5, palette = 'cold_framed') (0.50588235294117645, 0.25294117647058822, 1.0, 1.0) Build in palette example >>> PP(map_values_to_rgba(N.linspace(0,1,10), palette = 'nipy_spectral')) [(0.0, 0.0, 0.0, 1.0), (0.42873137254901961, 0.0, 0.61307843137254903, 1.0), (0.0, 0.18301960784313726, 0.86670000000000003, 1.0), (0.0, 0.64446666666666663, 0.73336666666666672, 1.0), (0.0, 0.60915490196078426, 0.073198039215686239, 1.0), (0.0, 0.88499607843137262, 0.0, 1.0), (0.7999666666666666, 0.97776666666666667, 0.0, 1.0), (1.0, 0.67843137254901964, 0.0, 1.0), (0.89283725490196075, 0.0, 0.0, 1.0), (0.80000000000000004, 0.80000000000000004, 0.80000000000000004, 1.0)] Test scaling First pass in values only from 0 to 0.5 >>> PP(map_values_to_rgba(N.linspace(0,0.5,10), palette = 'nipy_spectral')) [(0.0, 0.0, 0.0, 1.0), (0.4732294117647059, 0.0, 0.53983921568627447, 1.0), (0.42873137254901961, 0.0, 0.61307843137254903, 1.0), (0.0, 0.0, 0.72552352941176468, 1.0), (0.0, 0.18301960784313726, 0.86670000000000003, 1.0), (0.0, 0.54249803921568629, 0.86670000000000003, 1.0), (0.0, 0.64446666666666663, 0.73336666666666672, 1.0), (0.0, 0.66669999999999996, 0.5646882352941176, 1.0), (0.0, 0.60915490196078426, 0.073198039215686239, 1.0), (0.0, 0.73853137254901957, 0.0, 1.0)] Now pass them in again using scaling, will span to maximum colors available Will be equivalent to Build in pallete example two entries above >>> PP(map_values_to_rgba(N.linspace(0,0.5,10), palette = 'nipy_spectral', scale = True)) [(0.0, 0.0, 0.0, 1.0), (0.42873137254901961, 0.0, 0.61307843137254903, 1.0), (0.0, 0.18301960784313726, 0.86670000000000003, 1.0), (0.0, 0.64446666666666663, 0.73336666666666672, 1.0), (0.0, 0.60915490196078426, 0.073198039215686239, 1.0), (0.0, 0.88499607843137262, 0.0, 1.0), (0.7999666666666666, 0.97776666666666667, 0.0, 1.0), (1.0, 0.67843137254901964, 0.0, 1.0), (0.89283725490196075, 0.0, 0.0, 1.0), (0.80000000000000004, 0.80000000000000004, 0.80000000000000004, 1.0)] """ singleton = False if is_not_iterable(values): singleton = True values = [ values, ] if scale and not singleton: values = N.array(values, dtype=float) values -= N.min(values) values /= float(N.max(values)) if palette is None: palette = P.cm.cmapname cmap = custom_colormap(palette) retval = [cmap(x) for x in values] if uint8: retval = [(N.array(x) * 255).astype(N.uint8) for x in retval] if singleton: return retval[0] else: return retval
def tight_format_string(array): """ Return formatting string appropriate to encapsulate the largest integer in a passed array. Call without integer values >>> tight_format_string([1,5,0.1]) Traceback (most recent call last): Exception: Makes no sense to call this for non-integer data Standard case >>> tight_format_string([1,4,10,100,])%10 '010' >>> tight_format_string([1,4,10,100,])%100 '100' >>> tight_format_string([1,4,10,100,]) '%03d' Singleton case >>> tight_format_string(1)%1 '1' >>> tight_format_string(1) '%01d' Negative singleton >>> tight_format_string(-1)%(-1) '-1' >>> tight_format_string(-1) '% 02d' Negative integers >>> tight_format_string((-10, -100))%(-10) '-010' >>> tight_format_string((-10, -100)) '% 04d' Empty case >>> tight_format_string(()) '%d' Mixed integers >>> tight_format_string((-10,0,10))%0 ' 00' >>> tight_format_string((-10,0,10)) '% 03d' """ if array is None: return "%d" from mlib.iterable import is_not_iterable if is_not_iterable(array): array = [ array, ] if len(array) == 0: return "%d" newarr = N.array(array) if mlib.mtypes.isint(newarr[0]): negative = (newarr < 0).any() maxer = N.max(N.abs(array)) sizer = int(N.log10(maxer)) + 1 if negative: sizer += 1 return "%" + (" " if negative else "") + "0" + "%dd" % (sizer) raise Exception('Makes no sense to call this for non-integer data')
def any_within(substrings, potential_strings): """Looks for any match of the substring array elements in each of the potential_strings. If any are found, returns True. Try with single potential_string Basic use case positive >>> any_within(("LND_NA","LND_GL","SEA_GL"), "WL_SA_LND_NA_IN") True Basic use case negative >>> any_within(("LND_NA","LND_GL","SEA_GL"), "WL_SA_LAND_NA_IN") False If you don't provide substrings to seek, you fail >>> any_within((), "Hello") False One length list case >>> any_within(("one",), "this one") True Singleton-case >>> any_within ("one", "this one") True Now try with multiple potential_strings Basic use case positive >>> any_within(("LND_NA","LND_GL","SEA_GL"), ["WL_SA_LND_NA_IN", "WL_SA_LND_NA_IN"]) [True, True] Basic use case negative >>> any_within(("LND_NA","LND_GL","SEA_GL"), ["WL_SA_LAND_NA_IN", "WL_SA_LAND_NA_IN"]) [False, False] If you don't provide substrings to seek, you fail >>> any_within((), ["Hello","Hello"]) [False, False] """ if is_not_iterable(substrings): substrings = [ substrings, ] singleton_potential = is_not_iterable(potential_strings) if singleton_potential: potential_strings = [ potential_strings, ] answer = [] for potential_string in potential_strings: matched = False for sub in substrings: if sub in potential_string: answer.append(True) matched = True break if not matched: answer.append(False) if singleton_potential: return answer[0] return answer
def substitute_by_dictionary(instrings, sub_dict): """ Perform a group substitution in (list of) string given a dictionary of replacements Intelligently applies the largest possible matches first, and will not match into already replaced values. >>> substitute_by_dictionary(['hello there!',], {'hello': 'goodbye', 'there':'here'}) ['goodbye here!'] >>> substitute_by_dictionary([],{'one':'two'}) [] >>> substitute_by_dictionary("hello",{}) 'hello' >>> substitute_by_dictionary("hello",{'heck':'hell','hell':'heck','heck':'hell'}) 'hecko' """ import re import numpy as N from mlib.numeric import consecutive_boolean_region_ranges singleton = is_not_iterable(instrings) if singleton: instrings = [ instrings, ] # General theory is to look for the longest subst strings first and mark them as taken # Moving from longest to shortest, proceed until all substitutions are planned # Then concatenate remaining regions with subbed areas in a single act. keys_by_length = [ y[1] for y in sorted([(len(x), x) for x in sub_dict], reverse=True) ] final_strings = [] for instring in instrings: matched = N.array([ False, ] * len(instring)) key_spans = {} for key in keys_by_length: # Discover where this key is present in the input string spans = [x.span() for x in re.finditer(key, instring)] # Remove any spans that have already been matched elsewhere spans = [x for x in spans if sum(matched[x[0]:x[1]]) == 0] for span in spans: matched[span[0]:span[1]] = True for span in spans: key_spans[span[0]] = sub_dict[key] # Discover the spans for unmatched text for span in consecutive_boolean_region_ranges(~matched): key_spans[span[0]] = instring[span[0]:span[1] + 1] # Assemble the final string by moving a pointer along the first and expanding to the final final_strings.append("") for index in sorted(key_spans): final_strings[-1] += key_spans[index] if singleton: final_strings = final_strings[0] return final_strings